diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 251c2f9bb25a..fea518d18715 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -138,7 +138,8 @@ private: bool SelectMUBUFIntrinsicVOffset(SDValue Offset, SDValue &SOffset, SDValue &ImmOffset, SDValue &VOffset) const; - bool SelectFlat(SDValue Addr, SDValue &VAddr, SDValue &SLC) const; + bool SelectFlat(SDValue Addr, SDValue &VAddr, + SDValue &Offset, SDValue &SLC) const; bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset, bool &Imm) const; @@ -1315,8 +1316,10 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicVOffset(SDValue Offset, bool AMDGPUDAGToDAGISel::SelectFlat(SDValue Addr, SDValue &VAddr, + SDValue &Offset, SDValue &SLC) const { VAddr = Addr; + Offset = CurDAG->getTargetConstant(0, SDLoc(), MVT::i16); SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1); return true; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index a7eac080f885..e54c887d6090 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -126,8 +126,9 @@ bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const { MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(AMDGPU::FLAT_STORE_DWORD)) .add(I.getOperand(1)) .add(I.getOperand(0)) - .addImm(0) - .addImm(0); + .addImm(0) // offset + .addImm(0) // glc + .addImm(0); // slc // Now that we selected an opcode, we need to constrain the register @@ -392,8 +393,9 @@ bool AMDGPUInstructionSelector::selectG_LOAD(MachineInstr &I) const { MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode)) .add(I.getOperand(0)) .addReg(PtrReg) - .addImm(0) - .addImm(0); + .addImm(0) // offset + .addImm(0) // glc + .addImm(0); // slc bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI); I.eraseFromParent(); diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 16e3b7b4ebee..871aa089b621 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -285,6 +285,9 @@ public: bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<16>(getImm()); } bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } + + bool isOffsetU12() const { return isImmTy(ImmTyOffset) && isUInt<12>(getImm()); } + bool isOffsetS13() const { return isImmTy(ImmTyOffset) && isInt<13>(getImm()); } bool isGDS() const { return isImmTy(ImmTyGDS); } bool isGLC() const { return isImmTy(ImmTyGLC); } bool isSLC() const { return isImmTy(ImmTySLC); } @@ -886,6 +889,10 @@ public: return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; } + bool hasFlatOffsets() const { + return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; + } + bool hasSGPR102_SGPR103() const { return !isVI(); } @@ -1034,6 +1041,8 @@ public: AMDGPUOperand::Ptr defaultSMRDOffset8() const; AMDGPUOperand::Ptr defaultSMRDOffset20() const; AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; + AMDGPUOperand::Ptr defaultOffsetU12() const; + AMDGPUOperand::Ptr defaultOffsetS13() const; OperandMatchResultTy parseOModOperand(OperandVector &Operands); @@ -1970,6 +1979,15 @@ unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { } } + if ((TSFlags & SIInstrFlags::FLAT) && !hasFlatOffsets()) { + // FIXME: Produces error without correct column reported. + auto OpNum = + AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset); + const auto &Op = Inst.getOperand(OpNum); + if (Op.getImm() != 0) + return Match_InvalidOperand; + } + return Match_Success; } @@ -3849,6 +3867,14 @@ AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); } +AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetU12() const { + return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); +} + +AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetS13() const { + return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); +} + //===----------------------------------------------------------------------===// // vop3 //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index 8ba9efd42c70..0848853c00b8 100644 --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// -def FLATAtomic : ComplexPattern; +def FLATAtomic : ComplexPattern; //===----------------------------------------------------------------------===// // FLAT classes @@ -55,6 +55,8 @@ class FLAT_Real op, FLAT_Pseudo ps> : // copy relevant pseudo op flags let SubtargetPredicate = ps.SubtargetPredicate; let AsmMatchConverter = ps.AsmMatchConverter; + let TSFlags = ps.TSFlags; + let UseNamedOperandTable = ps.UseNamedOperandTable; // encoding fields bits<8> vaddr; @@ -63,10 +65,23 @@ class FLAT_Real op, FLAT_Pseudo ps> : bits<1> slc; bits<1> glc; + // Only valid on gfx9 + bits<1> lds = 0; // XXX - What does this actually do? + bits<2> seg; // Segment, 00=flat, 01=scratch, 10=global, 11=reserved + + // Signed offset. Highest bit ignored for flat and treated as 12-bit + // unsigned for flat acceses. + bits<13> offset; + bits<1> nv = 0; // XXX - What does this actually do? + // We don't use tfe right now, and it was removed in gfx9. bits<1> tfe = 0; - // 15-0 is reserved. + // Only valid on GFX9+ + let Inst{12-0} = offset; + let Inst{13} = lds; + let Inst{15-14} = 0; + let Inst{16} = !if(ps.has_glc, glc, ps.glcValue); let Inst{17} = slc; let Inst{24-18} = op; @@ -74,24 +89,30 @@ class FLAT_Real op, FLAT_Pseudo ps> : let Inst{39-32} = vaddr; let Inst{47-40} = !if(ps.has_data, vdata, ?); // 54-48 is reserved. - let Inst{55} = tfe; + let Inst{55} = nv; // nv on GFX9+, TFE before. let Inst{63-56} = !if(ps.has_vdst, vdst, ?); } -class FLAT_Load_Pseudo : FLAT_Pseudo< +class FLAT_Load_Pseudo : FLAT_Pseudo< opName, (outs regClass:$vdst), - (ins VReg_64:$vaddr, GLC:$glc, slc:$slc), - " $vdst, $vaddr$glc$slc"> { + !if(HasSignedOffset, + (ins VReg_64:$vaddr, offset_s13:$offset, GLC:$glc, slc:$slc), + (ins VReg_64:$vaddr, offset_u12:$offset, GLC:$glc, slc:$slc)), + " $vdst, $vaddr$offset$glc$slc"> { let has_data = 0; let mayLoad = 1; } -class FLAT_Store_Pseudo : FLAT_Pseudo< +class FLAT_Store_Pseudo : FLAT_Pseudo< opName, (outs), - (ins VReg_64:$vaddr, vdataClass:$vdata, GLC:$glc, slc:$slc), - " $vaddr, $vdata$glc$slc"> { + !if(HasSignedOffset, + (ins VReg_64:$vaddr, vdataClass:$vdata, offset_s13:$offset, GLC:$glc, slc:$slc), + (ins VReg_64:$vaddr, vdataClass:$vdata, offset_u12:$offset, GLC:$glc, slc:$slc)), + " $vaddr, $vdata$offset$glc$slc"> { let mayLoad = 0; let mayStore = 1; let has_vdst = 0; @@ -103,12 +124,15 @@ multiclass FLAT_Atomic_Pseudo< ValueType vt, SDPatternOperator atomic = null_frag, ValueType data_vt = vt, - RegisterClass data_rc = vdst_rc> { + RegisterClass data_rc = vdst_rc, + bit HasSignedOffset = 0> { def "" : FLAT_Pseudo , AtomicNoRet { let mayLoad = 1; @@ -121,10 +145,12 @@ multiclass FLAT_Atomic_Pseudo< def _RTN : FLAT_Pseudo , + (atomic (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$vdata))]>, AtomicNoRet { let mayLoad = 1; let mayStore = 1; @@ -313,30 +339,30 @@ def flat_truncstorei16 : flat_st ; // Patterns for global loads with no offset. class FlatLoadPat : Pat < (vt (node i64:$addr)), - (inst $addr, 0, 0) + (inst $addr, 0, 0, 0) >; class FlatLoadAtomicPat : Pat < (vt (node i64:$addr)), - (inst $addr, 1, 0) + (inst $addr, 0, 1, 0) >; class FlatStorePat : Pat < (node vt:$data, i64:$addr), - (inst $addr, $data, 0, 0) + (inst $addr, $data, 0, 0, 0) >; class FlatStoreAtomicPat : Pat < // atomic store follows atomic binop convention so the address comes // first. (node i64:$addr, vt:$data), - (inst $addr, $data, 1, 0) + (inst $addr, $data, 0, 1, 0) >; class FlatAtomicPat : Pat < (vt (node i64:$addr, data_vt:$data)), - (inst $addr, $data, 0) + (inst $addr, $data, 0, 0) >; let Predicates = [isCIVI] in { diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 445bf79a7814..470a47b02443 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -492,11 +492,21 @@ class NamedOperandU8 : Operand { let ParserMatchClass = MatchClass; } +class NamedOperandU12 : Operand { + let PrintMethod = "print"#Name; + let ParserMatchClass = MatchClass; +} + class NamedOperandU16 : Operand { let PrintMethod = "print"#Name; let ParserMatchClass = MatchClass; } +class NamedOperandS13 : Operand { + let PrintMethod = "print"#Name; + let ParserMatchClass = MatchClass; +} + class NamedOperandU32 : Operand { let PrintMethod = "print"#Name; let ParserMatchClass = MatchClass; @@ -514,6 +524,8 @@ def offen : NamedOperandBit<"Offen", NamedMatchClass<"Offen">>; def idxen : NamedOperandBit<"Idxen", NamedMatchClass<"Idxen">>; def addr64 : NamedOperandBit<"Addr64", NamedMatchClass<"Addr64">>; +def offset_u12 : NamedOperandU12<"Offset", NamedMatchClass<"OffsetU12">>; +def offset_s13 : NamedOperandS13<"Offset", NamedMatchClass<"OffsetS13">>; def offset : NamedOperandU16<"Offset", NamedMatchClass<"Offset">>; def offset0 : NamedOperandU8<"Offset0", NamedMatchClass<"Offset0">>; def offset1 : NamedOperandU8<"Offset1", NamedMatchClass<"Offset1">>; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir index 2a3d3887ed69..56a9e7022db9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir @@ -14,7 +14,7 @@ regBankSelected: true # GCN: global_addrspace # GCN: [[PTR:%[0-9]+]] = COPY %vgpr0_vgpr1 -# GCN: FLAT_LOAD_DWORD [[PTR]], 0, 0 +# GCN: FLAT_LOAD_DWORD [[PTR]], 0, 0, 0 body: | bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir index 89be3bde94a8..ea435725bf25 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir @@ -15,7 +15,7 @@ regBankSelected: true # GCN: global_addrspace # GCN: [[PTR:%[0-9]+]] = COPY %vgpr0_vgpr1 # GCN: [[VAL:%[0-9]+]] = COPY %vgpr2 -# GCN: FLAT_STORE_DWORD [[PTR]], [[VAL]], 0, 0 +# GCN: FLAT_STORE_DWORD [[PTR]], [[VAL]], 0, 0, 0 body: | bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir b/llvm/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir index bc992ed77ffd..62b47beb1251 100644 --- a/llvm/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir +++ b/llvm/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir @@ -219,19 +219,19 @@ body: | %34 = V_MOV_B32_e32 63, implicit %exec %27 = V_AND_B32_e64 %26, %24, implicit %exec - FLAT_STORE_DWORD %37, %27, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) + FLAT_STORE_DWORD %37, %27, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) %28 = V_AND_B32_e64 %24, %26, implicit %exec - FLAT_STORE_DWORD %37, %28, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) + FLAT_STORE_DWORD %37, %28, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) %29 = V_AND_B32_e32 %26, %24, implicit %exec - FLAT_STORE_DWORD %37, %29, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) + FLAT_STORE_DWORD %37, %29, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) %30 = V_AND_B32_e64 %26, %26, implicit %exec - FLAT_STORE_DWORD %37, %30, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) + FLAT_STORE_DWORD %37, %30, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) %31 = V_AND_B32_e64 %34, %34, implicit %exec - FLAT_STORE_DWORD %37, %31, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) + FLAT_STORE_DWORD %37, %31, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) S_ENDPGM @@ -407,34 +407,34 @@ body: | %27 = S_MOV_B32 -4 %11 = V_LSHLREV_B32_e64 12, %10, implicit %exec - FLAT_STORE_DWORD %20, %11, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) + FLAT_STORE_DWORD %20, %11, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) %12 = V_LSHLREV_B32_e64 %7, 12, implicit %exec - FLAT_STORE_DWORD %20, %12, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) + FLAT_STORE_DWORD %20, %12, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) %13 = V_LSHL_B32_e64 %7, 12, implicit %exec - FLAT_STORE_DWORD %20, %13, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) + FLAT_STORE_DWORD %20, %13, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) %14 = V_LSHL_B32_e64 12, %7, implicit %exec - FLAT_STORE_DWORD %20, %14, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) + FLAT_STORE_DWORD %20, %14, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) %15 = V_LSHL_B32_e64 12, %24, implicit %exec - FLAT_STORE_DWORD %20, %15, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) + FLAT_STORE_DWORD %20, %15, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) %22 = V_LSHL_B32_e64 %6, 12, implicit %exec - FLAT_STORE_DWORD %20, %22, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) + FLAT_STORE_DWORD %20, %22, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) %23 = V_LSHL_B32_e64 %6, 32, implicit %exec - FLAT_STORE_DWORD %20, %23, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) + FLAT_STORE_DWORD %20, %23, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) %25 = V_LSHL_B32_e32 %6, %6, implicit %exec - FLAT_STORE_DWORD %20, %25, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) + FLAT_STORE_DWORD %20, %25, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) %26 = V_LSHLREV_B32_e32 11, %24, implicit %exec - FLAT_STORE_DWORD %20, %26, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) + FLAT_STORE_DWORD %20, %26, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) %28 = V_LSHL_B32_e32 %27, %6, implicit %exec - FLAT_STORE_DWORD %20, %28, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) + FLAT_STORE_DWORD %20, %28, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) S_ENDPGM @@ -615,34 +615,34 @@ body: | %35 = V_MOV_B32_e32 2, implicit %exec %11 = V_ASHRREV_I32_e64 8, %10, implicit %exec - FLAT_STORE_DWORD %20, %11, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) + FLAT_STORE_DWORD %20, %11, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) %12 = V_ASHRREV_I32_e64 %8, %10, implicit %exec - FLAT_STORE_DWORD %20, %12, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) + FLAT_STORE_DWORD %20, %12, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) %13 = V_ASHR_I32_e64 %7, 3, implicit %exec - FLAT_STORE_DWORD %20, %13, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) + FLAT_STORE_DWORD %20, %13, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) %14 = V_ASHR_I32_e64 7, %32, implicit %exec - FLAT_STORE_DWORD %20, %14, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) + FLAT_STORE_DWORD %20, %14, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) %15 = V_ASHR_I32_e64 %27, %24, implicit %exec - FLAT_STORE_DWORD %20, %15, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) + FLAT_STORE_DWORD %20, %15, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) %22 = V_ASHR_I32_e64 %6, 4, implicit %exec - FLAT_STORE_DWORD %20, %22, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) + FLAT_STORE_DWORD %20, %22, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) %23 = V_ASHR_I32_e64 %6, %33, implicit %exec - FLAT_STORE_DWORD %20, %23, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) + FLAT_STORE_DWORD %20, %23, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) %25 = V_ASHR_I32_e32 %34, %34, implicit %exec - FLAT_STORE_DWORD %20, %25, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) + FLAT_STORE_DWORD %20, %25, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) %26 = V_ASHRREV_I32_e32 11, %10, implicit %exec - FLAT_STORE_DWORD %20, %26, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) + FLAT_STORE_DWORD %20, %26, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) %28 = V_ASHR_I32_e32 %27, %35, implicit %exec - FLAT_STORE_DWORD %20, %28, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) + FLAT_STORE_DWORD %20, %28, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) S_ENDPGM @@ -824,34 +824,34 @@ body: | %35 = V_MOV_B32_e32 2, implicit %exec %11 = V_LSHRREV_B32_e64 8, %10, implicit %exec - FLAT_STORE_DWORD %20, %11, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) + FLAT_STORE_DWORD %20, %11, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) %12 = V_LSHRREV_B32_e64 %8, %10, implicit %exec - FLAT_STORE_DWORD %20, %12, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) + FLAT_STORE_DWORD %20, %12, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) %13 = V_LSHR_B32_e64 %7, 3, implicit %exec - FLAT_STORE_DWORD %20, %13, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) + FLAT_STORE_DWORD %20, %13, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) %14 = V_LSHR_B32_e64 7, %32, implicit %exec - FLAT_STORE_DWORD %20, %14, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) + FLAT_STORE_DWORD %20, %14, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) %15 = V_LSHR_B32_e64 %27, %24, implicit %exec - FLAT_STORE_DWORD %20, %15, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) + FLAT_STORE_DWORD %20, %15, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) %22 = V_LSHR_B32_e64 %6, 4, implicit %exec - FLAT_STORE_DWORD %20, %22, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) + FLAT_STORE_DWORD %20, %22, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) %23 = V_LSHR_B32_e64 %6, %33, implicit %exec - FLAT_STORE_DWORD %20, %23, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) + FLAT_STORE_DWORD %20, %23, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) %25 = V_LSHR_B32_e32 %34, %34, implicit %exec - FLAT_STORE_DWORD %20, %25, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) + FLAT_STORE_DWORD %20, %25, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) %26 = V_LSHRREV_B32_e32 11, %10, implicit %exec - FLAT_STORE_DWORD %20, %26, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) + FLAT_STORE_DWORD %20, %26, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) %28 = V_LSHR_B32_e32 %27, %35, implicit %exec - FLAT_STORE_DWORD %20, %28, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) + FLAT_STORE_DWORD %20, %28, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) S_ENDPGM diff --git a/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir b/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir index ff9fcd1c693f..c6fe6debd225 100644 --- a/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir +++ b/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir @@ -246,15 +246,15 @@ body: | S_BRANCH %bb.1 bb.1: - FLAT_STORE_DWORDX2 %vgpr0_vgpr1, %vgpr2_vgpr3, 0, 0, implicit %exec, implicit %flat_scr + FLAT_STORE_DWORDX2 %vgpr0_vgpr1, %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr %vgpr3 = V_MOV_B32_e32 0, implicit %exec - FLAT_STORE_DWORDX3 %vgpr0_vgpr1, %vgpr2_vgpr3_vgpr4, 0, 0, implicit %exec, implicit %flat_scr + FLAT_STORE_DWORDX3 %vgpr0_vgpr1, %vgpr2_vgpr3_vgpr4, 0, 0, 0, implicit %exec, implicit %flat_scr %vgpr3 = V_MOV_B32_e32 0, implicit %exec - FLAT_STORE_DWORDX4 %vgpr0_vgpr1, %vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, implicit %exec, implicit %flat_scr + FLAT_STORE_DWORDX4 %vgpr0_vgpr1, %vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr %vgpr3 = V_MOV_B32_e32 0, implicit %exec - FLAT_ATOMIC_CMPSWAP_X2 %vgpr0_vgpr1, %vgpr2_vgpr3_vgpr4_vgpr5, 0, implicit %exec, implicit %flat_scr + FLAT_ATOMIC_CMPSWAP_X2 %vgpr0_vgpr1, %vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, implicit %exec, implicit %flat_scr %vgpr3 = V_MOV_B32_e32 0, implicit %exec - FLAT_ATOMIC_FCMPSWAP_X2 %vgpr0_vgpr1, %vgpr2_vgpr3_vgpr4_vgpr5, 0, implicit %exec, implicit %flat_scr + FLAT_ATOMIC_FCMPSWAP_X2 %vgpr0_vgpr1, %vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, implicit %exec, implicit %flat_scr %vgpr3 = V_MOV_B32_e32 0, implicit %exec S_ENDPGM diff --git a/llvm/test/CodeGen/AMDGPU/limit-coalesce.mir b/llvm/test/CodeGen/AMDGPU/limit-coalesce.mir index 7d6d8a5891cd..d6b3d7b14cd2 100644 --- a/llvm/test/CodeGen/AMDGPU/limit-coalesce.mir +++ b/llvm/test/CodeGen/AMDGPU/limit-coalesce.mir @@ -57,15 +57,15 @@ body: | %4.sub1 = COPY %3.sub0 undef %5.sub0 = COPY %4.sub1 %5.sub1 = COPY %4.sub0 - FLAT_STORE_DWORDX2 %vgpr0_vgpr1, killed %5, 0, 0, implicit %exec, implicit %flat_scr + FLAT_STORE_DWORDX2 %vgpr0_vgpr1, killed %5, 0, 0, 0, implicit %exec, implicit %flat_scr %6 = IMPLICIT_DEF undef %7.sub0_sub1 = COPY %6 %7.sub2 = COPY %3.sub0 - FLAT_STORE_DWORDX3 %vgpr0_vgpr1, killed %7, 0, 0, implicit %exec, implicit %flat_scr + FLAT_STORE_DWORDX3 %vgpr0_vgpr1, killed %7, 0, 0, 0, implicit %exec, implicit %flat_scr %8 = IMPLICIT_DEF undef %9.sub0_sub1_sub2 = COPY %8 %9.sub3 = COPY %3.sub0 - FLAT_STORE_DWORDX4 %vgpr0_vgpr1, killed %9, 0, 0, implicit %exec, implicit %flat_scr + FLAT_STORE_DWORDX4 %vgpr0_vgpr1, killed %9, 0, 0, 0, implicit %exec, implicit %flat_scr ... diff --git a/llvm/test/CodeGen/AMDGPU/rename-independent-subregs-invalid-mac-operands.mir b/llvm/test/CodeGen/AMDGPU/rename-independent-subregs-invalid-mac-operands.mir index 1a0d68d81f97..31024277871d 100644 --- a/llvm/test/CodeGen/AMDGPU/rename-independent-subregs-invalid-mac-operands.mir +++ b/llvm/test/CodeGen/AMDGPU/rename-independent-subregs-invalid-mac-operands.mir @@ -58,12 +58,12 @@ body: | bb.3: %1 = COPY killed %17 - FLAT_STORE_DWORD undef %10, %1.sub2, 0, 0, implicit %exec, implicit %flat_scr + FLAT_STORE_DWORD undef %10, %1.sub2, 0, 0, 0, implicit %exec, implicit %flat_scr %14 = COPY %1.sub1 %16 = COPY killed %1.sub0 undef %15.sub0 = COPY killed %16 %15.sub1 = COPY killed %14 - FLAT_STORE_DWORDX2 undef %11, killed %15, 0, 0, implicit %exec, implicit %flat_scr + FLAT_STORE_DWORDX2 undef %11, killed %15, 0, 0, 0, implicit %exec, implicit %flat_scr S_ENDPGM ... diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-scalar-ops.mir b/llvm/test/CodeGen/AMDGPU/sdwa-scalar-ops.mir index cd0d410368c7..ba937c927c70 100644 --- a/llvm/test/CodeGen/AMDGPU/sdwa-scalar-ops.mir +++ b/llvm/test/CodeGen/AMDGPU/sdwa-scalar-ops.mir @@ -214,26 +214,26 @@ body: | %15 = S_ADDC_U32 %7.sub1, %0.sub1, implicit-def dead %scc, implicit %scc %16 = REG_SEQUENCE %14, 1, %15, 2 %18 = COPY %16 - %17 = FLAT_LOAD_DWORD %18, 0, 0, implicit %exec, implicit %flat_scr :: (load 4 from %ir.uglygep45) + %17 = FLAT_LOAD_DWORD %18, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 4 from %ir.uglygep45) %60 = V_BFE_U32 %17, 8, 8, implicit %exec %61 = V_LSHLREV_B32_e32 2, killed %60, implicit %exec %70 = V_ADD_I32_e32 %7.sub0, %61, implicit-def %vcc, implicit %exec %66 = COPY %13 %65 = V_ADDC_U32_e32 0, %66, implicit-def %vcc, implicit %vcc, implicit %exec %67 = REG_SEQUENCE %70, 1, killed %65, 2 - FLAT_STORE_DWORD %67, %30, 0, 0, implicit %exec, implicit %flat_scr :: (store 4 into %ir.tmp9) + FLAT_STORE_DWORD %67, %30, 0, 0, 0, implicit %exec, implicit %flat_scr :: (store 4 into %ir.tmp9) %37 = S_ADD_U32 %14, 4, implicit-def %scc %38 = S_ADDC_U32 %15, 0, implicit-def dead %scc, implicit %scc %71 = COPY killed %37 %72 = COPY killed %38 %41 = REG_SEQUENCE killed %71, 1, killed %72, 2 - %40 = FLAT_LOAD_DWORD killed %41, 0, 0, implicit %exec, implicit %flat_scr :: (load 4 from %ir.scevgep) + %40 = FLAT_LOAD_DWORD killed %41, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 4 from %ir.scevgep) %73 = V_BFE_U32 %40, 8, 8, implicit %exec %74 = V_LSHLREV_B32_e32 2, killed %73, implicit %exec %83 = V_ADD_I32_e32 %7.sub0, %74, implicit-def %vcc, implicit %exec %78 = V_ADDC_U32_e32 0, %66, implicit-def %vcc, implicit %vcc, implicit %exec %80 = REG_SEQUENCE %83, 1, killed %78, 2 - FLAT_STORE_DWORD %80, %30, 0, 0, implicit %exec, implicit %flat_scr :: (store 4 into %ir.tmp17) + FLAT_STORE_DWORD %80, %30, 0, 0, 0, implicit %exec, implicit %flat_scr :: (store 4 into %ir.tmp17) %55 = S_ADD_U32 %0.sub0, 8, implicit-def %scc %56 = S_ADDC_U32 %0.sub1, 0, implicit-def dead %scc, implicit %scc %57 = REG_SEQUENCE %55, 1, killed %56, 2 @@ -377,26 +377,26 @@ body: | %15 = S_ADDC_U32 %7.sub1, %0.sub1, implicit-def dead %scc, implicit %scc %16 = REG_SEQUENCE %14, 1, %15, 2 %18 = COPY %16 - %17 = FLAT_LOAD_DWORD %18, 0, 0, implicit %exec, implicit %flat_scr :: (load 4 from %ir.uglygep45) + %17 = FLAT_LOAD_DWORD %18, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 4 from %ir.uglygep45) %60 = V_BFE_U32 %17, 8, 8, implicit %exec %61 = V_LSHLREV_B32_e32 %84, killed %60, implicit %exec %70 = V_ADD_I32_e32 %7.sub0, %61, implicit-def %vcc, implicit %exec %66 = COPY %13 %65 = V_ADDC_U32_e32 0, %66, implicit-def %vcc, implicit %vcc, implicit %exec %67 = REG_SEQUENCE %70, 1, killed %65, 2 - FLAT_STORE_DWORD %67, %30, 0, 0, implicit %exec, implicit %flat_scr :: (store 4 into %ir.tmp9) + FLAT_STORE_DWORD %67, %30, 0, 0, 0, implicit %exec, implicit %flat_scr :: (store 4 into %ir.tmp9) %37 = S_ADD_U32 %14, 4, implicit-def %scc %38 = S_ADDC_U32 %15, 0, implicit-def dead %scc, implicit %scc %71 = COPY killed %37 %72 = COPY killed %38 %41 = REG_SEQUENCE killed %71, 1, killed %72, 2 - %40 = FLAT_LOAD_DWORD killed %41, 0, 0, implicit %exec, implicit %flat_scr :: (load 4 from %ir.scevgep) + %40 = FLAT_LOAD_DWORD killed %41, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 4 from %ir.scevgep) %73 = V_BFE_U32 %40, 8, 8, implicit %exec %74 = V_LSHLREV_B32_e32 %84, killed %73, implicit %exec %83 = V_ADD_I32_e32 %7.sub0, %74, implicit-def %vcc, implicit %exec %78 = V_ADDC_U32_e32 0, %66, implicit-def %vcc, implicit %vcc, implicit %exec %80 = REG_SEQUENCE %83, 1, killed %78, 2 - FLAT_STORE_DWORD %80, %30, 0, 0, implicit %exec, implicit %flat_scr :: (store 4 into %ir.tmp17) + FLAT_STORE_DWORD %80, %30, 0, 0, 0, implicit %exec, implicit %flat_scr :: (store 4 into %ir.tmp17) %55 = S_ADD_U32 %0.sub0, 8, implicit-def %scc %56 = S_ADDC_U32 %0.sub1, 0, implicit-def dead %scc, implicit %scc %57 = REG_SEQUENCE %55, 1, killed %56, 2 diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt.mir b/llvm/test/CodeGen/AMDGPU/waitcnt.mir index f754415dccb4..38662e83b359 100644 --- a/llvm/test/CodeGen/AMDGPU/waitcnt.mir +++ b/llvm/test/CodeGen/AMDGPU/waitcnt.mir @@ -51,21 +51,21 @@ name: flat_zero_waitcnt body: | bb.0: successors: %bb.1 - %vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, implicit %exec, implicit %flat_scr :: (load 4 from %ir.global4) - %vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 %vgpr7_vgpr8, 0, 0, implicit %exec, implicit %flat_scr :: (load 16 from %ir.global16) + %vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 4 from %ir.global4) + %vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 %vgpr7_vgpr8, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 16 from %ir.global16) %vgpr0 = V_MOV_B32_e32 %vgpr1, implicit %exec S_BRANCH %bb.1 bb.1: successors: %bb.2 - %vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, implicit %exec, implicit %flat_scr - %vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 %vgpr7_vgpr8, 0, 0, implicit %exec, implicit %flat_scr :: (load 16 from %ir.global16) + %vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, 0, implicit %exec, implicit %flat_scr + %vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 %vgpr7_vgpr8, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 16 from %ir.global16) %vgpr0 = V_MOV_B32_e32 %vgpr1, implicit %exec S_BRANCH %bb.2 bb.2: - %vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, implicit %exec, implicit %flat_scr :: (load 4 from %ir.flat4) - %vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 %vgpr7_vgpr8, 0, 0, implicit %exec, implicit %flat_scr :: (load 16 from %ir.flat16) + %vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 4 from %ir.flat4) + %vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 %vgpr7_vgpr8, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 16 from %ir.flat16) %vgpr0 = V_MOV_B32_e32 %vgpr1, implicit %exec S_ENDPGM ... @@ -86,11 +86,11 @@ name: single_fallthrough_successor_no_end_block_wait body: | bb.0: successors: %bb.1 - %vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, implicit %exec, implicit %flat_scr + %vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, 0, implicit %exec, implicit %flat_scr bb.1: %vgpr3_vgpr4 = V_LSHLREV_B64 4, %vgpr7_vgpr8, implicit %exec - FLAT_STORE_DWORD %vgpr3_vgpr4, %vgpr0, 0, 0, implicit %exec, implicit %flat_scr + FLAT_STORE_DWORD %vgpr3_vgpr4, %vgpr0, 0, 0, 0, implicit %exec, implicit %flat_scr S_ENDPGM ... --- @@ -114,15 +114,15 @@ name: single_branch_successor_not_next_block body: | bb.0: successors: %bb.2 - %vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, implicit %exec, implicit %flat_scr + %vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, 0, implicit %exec, implicit %flat_scr S_BRANCH %bb.2 bb.1: - FLAT_STORE_DWORD %vgpr8_vgpr9, %vgpr10, 0, 0, implicit %exec, implicit %flat_scr + FLAT_STORE_DWORD %vgpr8_vgpr9, %vgpr10, 0, 0, 0, implicit %exec, implicit %flat_scr S_ENDPGM bb.2: %vgpr3_vgpr4 = V_LSHLREV_B64 4, %vgpr7_vgpr8, implicit %exec - FLAT_STORE_DWORD %vgpr3_vgpr4, %vgpr0, 0, 0, implicit %exec, implicit %flat_scr + FLAT_STORE_DWORD %vgpr3_vgpr4, %vgpr0, 0, 0, 0, implicit %exec, implicit %flat_scr S_ENDPGM ... diff --git a/llvm/test/MC/AMDGPU/flat-gfx9.s b/llvm/test/MC/AMDGPU/flat-gfx9.s new file mode 100644 index 000000000000..5f93a7371b8b --- /dev/null +++ b/llvm/test/MC/AMDGPU/flat-gfx9.s @@ -0,0 +1,40 @@ +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck -check-prefix=VI -check-prefix=GCN %s + +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding 2>&1 %s | FileCheck -check-prefix=GFX9-ERR -check-prefix=GCNERR %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding 2>&1 %s | FileCheck -check-prefix=VI-ERR -check-prefix=GCNERR %s + + +flat_load_dword v1, v[3:4] offset:0 +// GCN: flat_load_dword v1, v[3:4] ; encoding: [0x00,0x00,0x50,0xdc,0x03,0x00,0x00,0x01] + +flat_load_dword v1, v[3:4] offset:-1 +// GCN-ERR: :35: error: failed parsing operand. + +// FIXME: Error on VI in wrong column +flat_load_dword v1, v[3:4] offset:4095 +// GFX9: flat_load_dword v1, v[3:4] offset:4095 ; encoding: [0xff,0x0f,0x50,0xdc,0x03,0x00,0x00,0x01] +// VIERR: :1: error: invalid operand for instruction + +flat_load_dword v1, v[3:4] offset:4096 +// GCNERR: :28: error: invalid operand for instruction + +flat_load_dword v1, v[3:4] offset:4 glc +// GFX9: flat_load_dword v1, v[3:4] offset:4 glc ; encoding: [0x04,0x00,0x51,0xdc,0x03,0x00,0x00,0x01] +// VIERR: :1: error: invalid operand for instruction + +flat_load_dword v1, v[3:4] offset:4 glc slc +// GFX9: flat_load_dword v1, v[3:4] offset:4 glc slc ; encoding: [0x04,0x00,0x53,0xdc,0x03,0x00,0x00,0x01] +// VIERR: :1: error: invalid operand for instruction + +flat_atomic_add v[3:4], v5 offset:8 slc +// GFX9: flat_atomic_add v[3:4], v5 offset:8 slc ; encoding: [0x08,0x00,0x0a,0xdd,0x03,0x05,0x00,0x00] +// VIERR: :1: error: invalid operand for instruction + +flat_atomic_swap v[3:4], v5 offset:16 +// GFX9: flat_atomic_swap v[3:4], v5 offset:16 ; encoding: [0x10,0x00,0x00,0xdd,0x03,0x05,0x00,0x00] +// VIERR: :1: error: invalid operand for instruction + +flat_store_dword v[3:4], v1 offset:16 +// GFX9: flat_store_dword v[3:4], v1 offset:16 ; encoding: [0x10,0x00,0x70,0xdc,0x03,0x01,0x00,0x00] +// VIERR: :1: error: invalid operand for instruction diff --git a/llvm/test/MC/AMDGPU/flat.s b/llvm/test/MC/AMDGPU/flat.s index 4e81799fe9f9..d8cad131d1e4 100644 --- a/llvm/test/MC/AMDGPU/flat.s +++ b/llvm/test/MC/AMDGPU/flat.s @@ -49,9 +49,10 @@ flat_store_dword v[3:4], v1 slc // FIXME: For atomic instructions, glc must be placed immediately following // the data regiser. These forms aren't currently supported: +// FIXME: offset:0 required // flat_atomic_add v1, v[3:4], v5 slc glc -flat_atomic_add v1 v[3:4], v5 glc slc +flat_atomic_add v1, v[3:4], v5 offset:0 glc slc // NOSI: error: // CI: flat_atomic_add v1, v[3:4], v5 glc slc ; encoding: [0x00,0x00,0xcb,0xdc,0x03,0x05,0x00,0x01] // VI: flat_atomic_add v1, v[3:4], v5 glc slc ; encoding: [0x00,0x00,0x0b,0xdd,0x03,0x05,0x00,0x01]