forked from OSchip/llvm-project
AMDGPU: Start adding offset fields to flat instructions
llvm-svn: 305194
This commit is contained in:
parent
14d61436c0
commit
fd02314113
|
@ -138,7 +138,8 @@ private:
|
||||||
bool SelectMUBUFIntrinsicVOffset(SDValue Offset, SDValue &SOffset,
|
bool SelectMUBUFIntrinsicVOffset(SDValue Offset, SDValue &SOffset,
|
||||||
SDValue &ImmOffset, SDValue &VOffset) const;
|
SDValue &ImmOffset, SDValue &VOffset) const;
|
||||||
|
|
||||||
bool SelectFlat(SDValue Addr, SDValue &VAddr, SDValue &SLC) const;
|
bool SelectFlat(SDValue Addr, SDValue &VAddr,
|
||||||
|
SDValue &Offset, SDValue &SLC) const;
|
||||||
|
|
||||||
bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
|
bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
|
||||||
bool &Imm) const;
|
bool &Imm) const;
|
||||||
|
@ -1315,8 +1316,10 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicVOffset(SDValue Offset,
|
||||||
|
|
||||||
bool AMDGPUDAGToDAGISel::SelectFlat(SDValue Addr,
|
bool AMDGPUDAGToDAGISel::SelectFlat(SDValue Addr,
|
||||||
SDValue &VAddr,
|
SDValue &VAddr,
|
||||||
|
SDValue &Offset,
|
||||||
SDValue &SLC) const {
|
SDValue &SLC) const {
|
||||||
VAddr = Addr;
|
VAddr = Addr;
|
||||||
|
Offset = CurDAG->getTargetConstant(0, SDLoc(), MVT::i16);
|
||||||
SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1);
|
SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
|
@ -126,8 +126,9 @@ bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const {
|
||||||
MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(AMDGPU::FLAT_STORE_DWORD))
|
MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(AMDGPU::FLAT_STORE_DWORD))
|
||||||
.add(I.getOperand(1))
|
.add(I.getOperand(1))
|
||||||
.add(I.getOperand(0))
|
.add(I.getOperand(0))
|
||||||
.addImm(0)
|
.addImm(0) // offset
|
||||||
.addImm(0);
|
.addImm(0) // glc
|
||||||
|
.addImm(0); // slc
|
||||||
|
|
||||||
|
|
||||||
// Now that we selected an opcode, we need to constrain the register
|
// Now that we selected an opcode, we need to constrain the register
|
||||||
|
@ -392,8 +393,9 @@ bool AMDGPUInstructionSelector::selectG_LOAD(MachineInstr &I) const {
|
||||||
MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode))
|
MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode))
|
||||||
.add(I.getOperand(0))
|
.add(I.getOperand(0))
|
||||||
.addReg(PtrReg)
|
.addReg(PtrReg)
|
||||||
.addImm(0)
|
.addImm(0) // offset
|
||||||
.addImm(0);
|
.addImm(0) // glc
|
||||||
|
.addImm(0); // slc
|
||||||
|
|
||||||
bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI);
|
bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI);
|
||||||
I.eraseFromParent();
|
I.eraseFromParent();
|
||||||
|
|
|
@ -285,6 +285,9 @@ public:
|
||||||
bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
|
bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
|
||||||
bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<16>(getImm()); }
|
bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<16>(getImm()); }
|
||||||
bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
|
bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
|
||||||
|
|
||||||
|
bool isOffsetU12() const { return isImmTy(ImmTyOffset) && isUInt<12>(getImm()); }
|
||||||
|
bool isOffsetS13() const { return isImmTy(ImmTyOffset) && isInt<13>(getImm()); }
|
||||||
bool isGDS() const { return isImmTy(ImmTyGDS); }
|
bool isGDS() const { return isImmTy(ImmTyGDS); }
|
||||||
bool isGLC() const { return isImmTy(ImmTyGLC); }
|
bool isGLC() const { return isImmTy(ImmTyGLC); }
|
||||||
bool isSLC() const { return isImmTy(ImmTySLC); }
|
bool isSLC() const { return isImmTy(ImmTySLC); }
|
||||||
|
@ -886,6 +889,10 @@ public:
|
||||||
return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
|
return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool hasFlatOffsets() const {
|
||||||
|
return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
|
||||||
|
}
|
||||||
|
|
||||||
bool hasSGPR102_SGPR103() const {
|
bool hasSGPR102_SGPR103() const {
|
||||||
return !isVI();
|
return !isVI();
|
||||||
}
|
}
|
||||||
|
@ -1034,6 +1041,8 @@ public:
|
||||||
AMDGPUOperand::Ptr defaultSMRDOffset8() const;
|
AMDGPUOperand::Ptr defaultSMRDOffset8() const;
|
||||||
AMDGPUOperand::Ptr defaultSMRDOffset20() const;
|
AMDGPUOperand::Ptr defaultSMRDOffset20() const;
|
||||||
AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
|
AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
|
||||||
|
AMDGPUOperand::Ptr defaultOffsetU12() const;
|
||||||
|
AMDGPUOperand::Ptr defaultOffsetS13() const;
|
||||||
|
|
||||||
OperandMatchResultTy parseOModOperand(OperandVector &Operands);
|
OperandMatchResultTy parseOModOperand(OperandVector &Operands);
|
||||||
|
|
||||||
|
@ -1970,6 +1979,15 @@ unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ((TSFlags & SIInstrFlags::FLAT) && !hasFlatOffsets()) {
|
||||||
|
// FIXME: Produces error without correct column reported.
|
||||||
|
auto OpNum =
|
||||||
|
AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset);
|
||||||
|
const auto &Op = Inst.getOperand(OpNum);
|
||||||
|
if (Op.getImm() != 0)
|
||||||
|
return Match_InvalidOperand;
|
||||||
|
}
|
||||||
|
|
||||||
return Match_Success;
|
return Match_Success;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3849,6 +3867,14 @@ AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
|
||||||
return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
|
return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetU12() const {
|
||||||
|
return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
|
||||||
|
}
|
||||||
|
|
||||||
|
AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetS13() const {
|
||||||
|
return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
|
||||||
|
}
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// vop3
|
// vop3
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
|
@ -7,7 +7,7 @@
|
||||||
//
|
//
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
def FLATAtomic : ComplexPattern<i64, 2, "SelectFlat">;
|
def FLATAtomic : ComplexPattern<i64, 3, "SelectFlat">;
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// FLAT classes
|
// FLAT classes
|
||||||
|
@ -55,6 +55,8 @@ class FLAT_Real <bits<7> op, FLAT_Pseudo ps> :
|
||||||
// copy relevant pseudo op flags
|
// copy relevant pseudo op flags
|
||||||
let SubtargetPredicate = ps.SubtargetPredicate;
|
let SubtargetPredicate = ps.SubtargetPredicate;
|
||||||
let AsmMatchConverter = ps.AsmMatchConverter;
|
let AsmMatchConverter = ps.AsmMatchConverter;
|
||||||
|
let TSFlags = ps.TSFlags;
|
||||||
|
let UseNamedOperandTable = ps.UseNamedOperandTable;
|
||||||
|
|
||||||
// encoding fields
|
// encoding fields
|
||||||
bits<8> vaddr;
|
bits<8> vaddr;
|
||||||
|
@ -63,10 +65,23 @@ class FLAT_Real <bits<7> op, FLAT_Pseudo ps> :
|
||||||
bits<1> slc;
|
bits<1> slc;
|
||||||
bits<1> glc;
|
bits<1> glc;
|
||||||
|
|
||||||
|
// Only valid on gfx9
|
||||||
|
bits<1> lds = 0; // XXX - What does this actually do?
|
||||||
|
bits<2> seg; // Segment, 00=flat, 01=scratch, 10=global, 11=reserved
|
||||||
|
|
||||||
|
// Signed offset. Highest bit ignored for flat and treated as 12-bit
|
||||||
|
// unsigned for flat acceses.
|
||||||
|
bits<13> offset;
|
||||||
|
bits<1> nv = 0; // XXX - What does this actually do?
|
||||||
|
|
||||||
// We don't use tfe right now, and it was removed in gfx9.
|
// We don't use tfe right now, and it was removed in gfx9.
|
||||||
bits<1> tfe = 0;
|
bits<1> tfe = 0;
|
||||||
|
|
||||||
// 15-0 is reserved.
|
// Only valid on GFX9+
|
||||||
|
let Inst{12-0} = offset;
|
||||||
|
let Inst{13} = lds;
|
||||||
|
let Inst{15-14} = 0;
|
||||||
|
|
||||||
let Inst{16} = !if(ps.has_glc, glc, ps.glcValue);
|
let Inst{16} = !if(ps.has_glc, glc, ps.glcValue);
|
||||||
let Inst{17} = slc;
|
let Inst{17} = slc;
|
||||||
let Inst{24-18} = op;
|
let Inst{24-18} = op;
|
||||||
|
@ -74,24 +89,30 @@ class FLAT_Real <bits<7> op, FLAT_Pseudo ps> :
|
||||||
let Inst{39-32} = vaddr;
|
let Inst{39-32} = vaddr;
|
||||||
let Inst{47-40} = !if(ps.has_data, vdata, ?);
|
let Inst{47-40} = !if(ps.has_data, vdata, ?);
|
||||||
// 54-48 is reserved.
|
// 54-48 is reserved.
|
||||||
let Inst{55} = tfe;
|
let Inst{55} = nv; // nv on GFX9+, TFE before.
|
||||||
let Inst{63-56} = !if(ps.has_vdst, vdst, ?);
|
let Inst{63-56} = !if(ps.has_vdst, vdst, ?);
|
||||||
}
|
}
|
||||||
|
|
||||||
class FLAT_Load_Pseudo <string opName, RegisterClass regClass> : FLAT_Pseudo<
|
class FLAT_Load_Pseudo <string opName, RegisterClass regClass,
|
||||||
|
bit HasSignedOffset = 0> : FLAT_Pseudo<
|
||||||
opName,
|
opName,
|
||||||
(outs regClass:$vdst),
|
(outs regClass:$vdst),
|
||||||
(ins VReg_64:$vaddr, GLC:$glc, slc:$slc),
|
!if(HasSignedOffset,
|
||||||
" $vdst, $vaddr$glc$slc"> {
|
(ins VReg_64:$vaddr, offset_s13:$offset, GLC:$glc, slc:$slc),
|
||||||
|
(ins VReg_64:$vaddr, offset_u12:$offset, GLC:$glc, slc:$slc)),
|
||||||
|
" $vdst, $vaddr$offset$glc$slc"> {
|
||||||
let has_data = 0;
|
let has_data = 0;
|
||||||
let mayLoad = 1;
|
let mayLoad = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
class FLAT_Store_Pseudo <string opName, RegisterClass vdataClass> : FLAT_Pseudo<
|
class FLAT_Store_Pseudo <string opName, RegisterClass vdataClass,
|
||||||
|
bit HasSignedOffset = 0> : FLAT_Pseudo<
|
||||||
opName,
|
opName,
|
||||||
(outs),
|
(outs),
|
||||||
(ins VReg_64:$vaddr, vdataClass:$vdata, GLC:$glc, slc:$slc),
|
!if(HasSignedOffset,
|
||||||
" $vaddr, $vdata$glc$slc"> {
|
(ins VReg_64:$vaddr, vdataClass:$vdata, offset_s13:$offset, GLC:$glc, slc:$slc),
|
||||||
|
(ins VReg_64:$vaddr, vdataClass:$vdata, offset_u12:$offset, GLC:$glc, slc:$slc)),
|
||||||
|
" $vaddr, $vdata$offset$glc$slc"> {
|
||||||
let mayLoad = 0;
|
let mayLoad = 0;
|
||||||
let mayStore = 1;
|
let mayStore = 1;
|
||||||
let has_vdst = 0;
|
let has_vdst = 0;
|
||||||
|
@ -103,12 +124,15 @@ multiclass FLAT_Atomic_Pseudo<
|
||||||
ValueType vt,
|
ValueType vt,
|
||||||
SDPatternOperator atomic = null_frag,
|
SDPatternOperator atomic = null_frag,
|
||||||
ValueType data_vt = vt,
|
ValueType data_vt = vt,
|
||||||
RegisterClass data_rc = vdst_rc> {
|
RegisterClass data_rc = vdst_rc,
|
||||||
|
bit HasSignedOffset = 0> {
|
||||||
|
|
||||||
def "" : FLAT_Pseudo <opName,
|
def "" : FLAT_Pseudo <opName,
|
||||||
(outs),
|
(outs),
|
||||||
(ins VReg_64:$vaddr, data_rc:$vdata, slc:$slc),
|
!if(HasSignedOffset,
|
||||||
" $vaddr, $vdata$slc",
|
(ins VReg_64:$vaddr, data_rc:$vdata, offset_s13:$offset, slc:$slc),
|
||||||
|
(ins VReg_64:$vaddr, data_rc:$vdata, offset_u12:$offset, slc:$slc)),
|
||||||
|
" $vaddr, $vdata$offset$slc",
|
||||||
[]>,
|
[]>,
|
||||||
AtomicNoRet <NAME, 0> {
|
AtomicNoRet <NAME, 0> {
|
||||||
let mayLoad = 1;
|
let mayLoad = 1;
|
||||||
|
@ -121,10 +145,12 @@ multiclass FLAT_Atomic_Pseudo<
|
||||||
|
|
||||||
def _RTN : FLAT_Pseudo <opName,
|
def _RTN : FLAT_Pseudo <opName,
|
||||||
(outs vdst_rc:$vdst),
|
(outs vdst_rc:$vdst),
|
||||||
(ins VReg_64:$vaddr, data_rc:$vdata, slc:$slc),
|
!if(HasSignedOffset,
|
||||||
" $vdst, $vaddr, $vdata glc$slc",
|
(ins VReg_64:$vaddr, data_rc:$vdata, offset_s13:$offset, slc:$slc),
|
||||||
|
(ins VReg_64:$vaddr, data_rc:$vdata, offset_u12:$offset, slc:$slc)),
|
||||||
|
" $vdst, $vaddr, $vdata$offset glc$slc",
|
||||||
[(set vt:$vdst,
|
[(set vt:$vdst,
|
||||||
(atomic (FLATAtomic i64:$vaddr, i1:$slc), data_vt:$vdata))]>,
|
(atomic (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$vdata))]>,
|
||||||
AtomicNoRet <NAME, 1> {
|
AtomicNoRet <NAME, 1> {
|
||||||
let mayLoad = 1;
|
let mayLoad = 1;
|
||||||
let mayStore = 1;
|
let mayStore = 1;
|
||||||
|
@ -313,30 +339,30 @@ def flat_truncstorei16 : flat_st <truncstorei16>;
|
||||||
// Patterns for global loads with no offset.
|
// Patterns for global loads with no offset.
|
||||||
class FlatLoadPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : Pat <
|
class FlatLoadPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : Pat <
|
||||||
(vt (node i64:$addr)),
|
(vt (node i64:$addr)),
|
||||||
(inst $addr, 0, 0)
|
(inst $addr, 0, 0, 0)
|
||||||
>;
|
>;
|
||||||
|
|
||||||
class FlatLoadAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : Pat <
|
class FlatLoadAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : Pat <
|
||||||
(vt (node i64:$addr)),
|
(vt (node i64:$addr)),
|
||||||
(inst $addr, 1, 0)
|
(inst $addr, 0, 1, 0)
|
||||||
>;
|
>;
|
||||||
|
|
||||||
class FlatStorePat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : Pat <
|
class FlatStorePat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : Pat <
|
||||||
(node vt:$data, i64:$addr),
|
(node vt:$data, i64:$addr),
|
||||||
(inst $addr, $data, 0, 0)
|
(inst $addr, $data, 0, 0, 0)
|
||||||
>;
|
>;
|
||||||
|
|
||||||
class FlatStoreAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : Pat <
|
class FlatStoreAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : Pat <
|
||||||
// atomic store follows atomic binop convention so the address comes
|
// atomic store follows atomic binop convention so the address comes
|
||||||
// first.
|
// first.
|
||||||
(node i64:$addr, vt:$data),
|
(node i64:$addr, vt:$data),
|
||||||
(inst $addr, $data, 1, 0)
|
(inst $addr, $data, 0, 1, 0)
|
||||||
>;
|
>;
|
||||||
|
|
||||||
class FlatAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt,
|
class FlatAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt,
|
||||||
ValueType data_vt = vt> : Pat <
|
ValueType data_vt = vt> : Pat <
|
||||||
(vt (node i64:$addr, data_vt:$data)),
|
(vt (node i64:$addr, data_vt:$data)),
|
||||||
(inst $addr, $data, 0)
|
(inst $addr, $data, 0, 0)
|
||||||
>;
|
>;
|
||||||
|
|
||||||
let Predicates = [isCIVI] in {
|
let Predicates = [isCIVI] in {
|
||||||
|
|
|
@ -492,11 +492,21 @@ class NamedOperandU8<string Name, AsmOperandClass MatchClass> : Operand<i8> {
|
||||||
let ParserMatchClass = MatchClass;
|
let ParserMatchClass = MatchClass;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class NamedOperandU12<string Name, AsmOperandClass MatchClass> : Operand<i16> {
|
||||||
|
let PrintMethod = "print"#Name;
|
||||||
|
let ParserMatchClass = MatchClass;
|
||||||
|
}
|
||||||
|
|
||||||
class NamedOperandU16<string Name, AsmOperandClass MatchClass> : Operand<i16> {
|
class NamedOperandU16<string Name, AsmOperandClass MatchClass> : Operand<i16> {
|
||||||
let PrintMethod = "print"#Name;
|
let PrintMethod = "print"#Name;
|
||||||
let ParserMatchClass = MatchClass;
|
let ParserMatchClass = MatchClass;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class NamedOperandS13<string Name, AsmOperandClass MatchClass> : Operand<i16> {
|
||||||
|
let PrintMethod = "print"#Name;
|
||||||
|
let ParserMatchClass = MatchClass;
|
||||||
|
}
|
||||||
|
|
||||||
class NamedOperandU32<string Name, AsmOperandClass MatchClass> : Operand<i32> {
|
class NamedOperandU32<string Name, AsmOperandClass MatchClass> : Operand<i32> {
|
||||||
let PrintMethod = "print"#Name;
|
let PrintMethod = "print"#Name;
|
||||||
let ParserMatchClass = MatchClass;
|
let ParserMatchClass = MatchClass;
|
||||||
|
@ -514,6 +524,8 @@ def offen : NamedOperandBit<"Offen", NamedMatchClass<"Offen">>;
|
||||||
def idxen : NamedOperandBit<"Idxen", NamedMatchClass<"Idxen">>;
|
def idxen : NamedOperandBit<"Idxen", NamedMatchClass<"Idxen">>;
|
||||||
def addr64 : NamedOperandBit<"Addr64", NamedMatchClass<"Addr64">>;
|
def addr64 : NamedOperandBit<"Addr64", NamedMatchClass<"Addr64">>;
|
||||||
|
|
||||||
|
def offset_u12 : NamedOperandU12<"Offset", NamedMatchClass<"OffsetU12">>;
|
||||||
|
def offset_s13 : NamedOperandS13<"Offset", NamedMatchClass<"OffsetS13">>;
|
||||||
def offset : NamedOperandU16<"Offset", NamedMatchClass<"Offset">>;
|
def offset : NamedOperandU16<"Offset", NamedMatchClass<"Offset">>;
|
||||||
def offset0 : NamedOperandU8<"Offset0", NamedMatchClass<"Offset0">>;
|
def offset0 : NamedOperandU8<"Offset0", NamedMatchClass<"Offset0">>;
|
||||||
def offset1 : NamedOperandU8<"Offset1", NamedMatchClass<"Offset1">>;
|
def offset1 : NamedOperandU8<"Offset1", NamedMatchClass<"Offset1">>;
|
||||||
|
|
|
@ -14,7 +14,7 @@ regBankSelected: true
|
||||||
|
|
||||||
# GCN: global_addrspace
|
# GCN: global_addrspace
|
||||||
# GCN: [[PTR:%[0-9]+]] = COPY %vgpr0_vgpr1
|
# GCN: [[PTR:%[0-9]+]] = COPY %vgpr0_vgpr1
|
||||||
# GCN: FLAT_LOAD_DWORD [[PTR]], 0, 0
|
# GCN: FLAT_LOAD_DWORD [[PTR]], 0, 0, 0
|
||||||
|
|
||||||
body: |
|
body: |
|
||||||
bb.0:
|
bb.0:
|
||||||
|
|
|
@ -15,7 +15,7 @@ regBankSelected: true
|
||||||
# GCN: global_addrspace
|
# GCN: global_addrspace
|
||||||
# GCN: [[PTR:%[0-9]+]] = COPY %vgpr0_vgpr1
|
# GCN: [[PTR:%[0-9]+]] = COPY %vgpr0_vgpr1
|
||||||
# GCN: [[VAL:%[0-9]+]] = COPY %vgpr2
|
# GCN: [[VAL:%[0-9]+]] = COPY %vgpr2
|
||||||
# GCN: FLAT_STORE_DWORD [[PTR]], [[VAL]], 0, 0
|
# GCN: FLAT_STORE_DWORD [[PTR]], [[VAL]], 0, 0, 0
|
||||||
|
|
||||||
body: |
|
body: |
|
||||||
bb.0:
|
bb.0:
|
||||||
|
|
|
@ -219,19 +219,19 @@ body: |
|
||||||
%34 = V_MOV_B32_e32 63, implicit %exec
|
%34 = V_MOV_B32_e32 63, implicit %exec
|
||||||
|
|
||||||
%27 = V_AND_B32_e64 %26, %24, implicit %exec
|
%27 = V_AND_B32_e64 %26, %24, implicit %exec
|
||||||
FLAT_STORE_DWORD %37, %27, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
FLAT_STORE_DWORD %37, %27, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
||||||
|
|
||||||
%28 = V_AND_B32_e64 %24, %26, implicit %exec
|
%28 = V_AND_B32_e64 %24, %26, implicit %exec
|
||||||
FLAT_STORE_DWORD %37, %28, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
FLAT_STORE_DWORD %37, %28, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
||||||
|
|
||||||
%29 = V_AND_B32_e32 %26, %24, implicit %exec
|
%29 = V_AND_B32_e32 %26, %24, implicit %exec
|
||||||
FLAT_STORE_DWORD %37, %29, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
FLAT_STORE_DWORD %37, %29, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
||||||
|
|
||||||
%30 = V_AND_B32_e64 %26, %26, implicit %exec
|
%30 = V_AND_B32_e64 %26, %26, implicit %exec
|
||||||
FLAT_STORE_DWORD %37, %30, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
FLAT_STORE_DWORD %37, %30, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
||||||
|
|
||||||
%31 = V_AND_B32_e64 %34, %34, implicit %exec
|
%31 = V_AND_B32_e64 %34, %34, implicit %exec
|
||||||
FLAT_STORE_DWORD %37, %31, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
FLAT_STORE_DWORD %37, %31, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
||||||
|
|
||||||
S_ENDPGM
|
S_ENDPGM
|
||||||
|
|
||||||
|
@ -407,34 +407,34 @@ body: |
|
||||||
%27 = S_MOV_B32 -4
|
%27 = S_MOV_B32 -4
|
||||||
|
|
||||||
%11 = V_LSHLREV_B32_e64 12, %10, implicit %exec
|
%11 = V_LSHLREV_B32_e64 12, %10, implicit %exec
|
||||||
FLAT_STORE_DWORD %20, %11, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
FLAT_STORE_DWORD %20, %11, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
||||||
|
|
||||||
%12 = V_LSHLREV_B32_e64 %7, 12, implicit %exec
|
%12 = V_LSHLREV_B32_e64 %7, 12, implicit %exec
|
||||||
FLAT_STORE_DWORD %20, %12, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
FLAT_STORE_DWORD %20, %12, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
||||||
|
|
||||||
%13 = V_LSHL_B32_e64 %7, 12, implicit %exec
|
%13 = V_LSHL_B32_e64 %7, 12, implicit %exec
|
||||||
FLAT_STORE_DWORD %20, %13, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
FLAT_STORE_DWORD %20, %13, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
||||||
|
|
||||||
%14 = V_LSHL_B32_e64 12, %7, implicit %exec
|
%14 = V_LSHL_B32_e64 12, %7, implicit %exec
|
||||||
FLAT_STORE_DWORD %20, %14, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
FLAT_STORE_DWORD %20, %14, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
||||||
|
|
||||||
%15 = V_LSHL_B32_e64 12, %24, implicit %exec
|
%15 = V_LSHL_B32_e64 12, %24, implicit %exec
|
||||||
FLAT_STORE_DWORD %20, %15, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
FLAT_STORE_DWORD %20, %15, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
||||||
|
|
||||||
%22 = V_LSHL_B32_e64 %6, 12, implicit %exec
|
%22 = V_LSHL_B32_e64 %6, 12, implicit %exec
|
||||||
FLAT_STORE_DWORD %20, %22, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
FLAT_STORE_DWORD %20, %22, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
||||||
|
|
||||||
%23 = V_LSHL_B32_e64 %6, 32, implicit %exec
|
%23 = V_LSHL_B32_e64 %6, 32, implicit %exec
|
||||||
FLAT_STORE_DWORD %20, %23, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
FLAT_STORE_DWORD %20, %23, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
||||||
|
|
||||||
%25 = V_LSHL_B32_e32 %6, %6, implicit %exec
|
%25 = V_LSHL_B32_e32 %6, %6, implicit %exec
|
||||||
FLAT_STORE_DWORD %20, %25, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
FLAT_STORE_DWORD %20, %25, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
||||||
|
|
||||||
%26 = V_LSHLREV_B32_e32 11, %24, implicit %exec
|
%26 = V_LSHLREV_B32_e32 11, %24, implicit %exec
|
||||||
FLAT_STORE_DWORD %20, %26, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
FLAT_STORE_DWORD %20, %26, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
||||||
|
|
||||||
%28 = V_LSHL_B32_e32 %27, %6, implicit %exec
|
%28 = V_LSHL_B32_e32 %27, %6, implicit %exec
|
||||||
FLAT_STORE_DWORD %20, %28, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
FLAT_STORE_DWORD %20, %28, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
||||||
|
|
||||||
S_ENDPGM
|
S_ENDPGM
|
||||||
|
|
||||||
|
@ -615,34 +615,34 @@ body: |
|
||||||
%35 = V_MOV_B32_e32 2, implicit %exec
|
%35 = V_MOV_B32_e32 2, implicit %exec
|
||||||
|
|
||||||
%11 = V_ASHRREV_I32_e64 8, %10, implicit %exec
|
%11 = V_ASHRREV_I32_e64 8, %10, implicit %exec
|
||||||
FLAT_STORE_DWORD %20, %11, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
FLAT_STORE_DWORD %20, %11, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
||||||
|
|
||||||
%12 = V_ASHRREV_I32_e64 %8, %10, implicit %exec
|
%12 = V_ASHRREV_I32_e64 %8, %10, implicit %exec
|
||||||
FLAT_STORE_DWORD %20, %12, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
FLAT_STORE_DWORD %20, %12, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
||||||
|
|
||||||
%13 = V_ASHR_I32_e64 %7, 3, implicit %exec
|
%13 = V_ASHR_I32_e64 %7, 3, implicit %exec
|
||||||
FLAT_STORE_DWORD %20, %13, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
FLAT_STORE_DWORD %20, %13, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
||||||
|
|
||||||
%14 = V_ASHR_I32_e64 7, %32, implicit %exec
|
%14 = V_ASHR_I32_e64 7, %32, implicit %exec
|
||||||
FLAT_STORE_DWORD %20, %14, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
FLAT_STORE_DWORD %20, %14, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
||||||
|
|
||||||
%15 = V_ASHR_I32_e64 %27, %24, implicit %exec
|
%15 = V_ASHR_I32_e64 %27, %24, implicit %exec
|
||||||
FLAT_STORE_DWORD %20, %15, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
FLAT_STORE_DWORD %20, %15, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
||||||
|
|
||||||
%22 = V_ASHR_I32_e64 %6, 4, implicit %exec
|
%22 = V_ASHR_I32_e64 %6, 4, implicit %exec
|
||||||
FLAT_STORE_DWORD %20, %22, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
FLAT_STORE_DWORD %20, %22, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
||||||
|
|
||||||
%23 = V_ASHR_I32_e64 %6, %33, implicit %exec
|
%23 = V_ASHR_I32_e64 %6, %33, implicit %exec
|
||||||
FLAT_STORE_DWORD %20, %23, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
FLAT_STORE_DWORD %20, %23, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
||||||
|
|
||||||
%25 = V_ASHR_I32_e32 %34, %34, implicit %exec
|
%25 = V_ASHR_I32_e32 %34, %34, implicit %exec
|
||||||
FLAT_STORE_DWORD %20, %25, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
FLAT_STORE_DWORD %20, %25, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
||||||
|
|
||||||
%26 = V_ASHRREV_I32_e32 11, %10, implicit %exec
|
%26 = V_ASHRREV_I32_e32 11, %10, implicit %exec
|
||||||
FLAT_STORE_DWORD %20, %26, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
FLAT_STORE_DWORD %20, %26, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
||||||
|
|
||||||
%28 = V_ASHR_I32_e32 %27, %35, implicit %exec
|
%28 = V_ASHR_I32_e32 %27, %35, implicit %exec
|
||||||
FLAT_STORE_DWORD %20, %28, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
FLAT_STORE_DWORD %20, %28, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
||||||
|
|
||||||
S_ENDPGM
|
S_ENDPGM
|
||||||
|
|
||||||
|
@ -824,34 +824,34 @@ body: |
|
||||||
%35 = V_MOV_B32_e32 2, implicit %exec
|
%35 = V_MOV_B32_e32 2, implicit %exec
|
||||||
|
|
||||||
%11 = V_LSHRREV_B32_e64 8, %10, implicit %exec
|
%11 = V_LSHRREV_B32_e64 8, %10, implicit %exec
|
||||||
FLAT_STORE_DWORD %20, %11, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
FLAT_STORE_DWORD %20, %11, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
||||||
|
|
||||||
%12 = V_LSHRREV_B32_e64 %8, %10, implicit %exec
|
%12 = V_LSHRREV_B32_e64 %8, %10, implicit %exec
|
||||||
FLAT_STORE_DWORD %20, %12, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
FLAT_STORE_DWORD %20, %12, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
||||||
|
|
||||||
%13 = V_LSHR_B32_e64 %7, 3, implicit %exec
|
%13 = V_LSHR_B32_e64 %7, 3, implicit %exec
|
||||||
FLAT_STORE_DWORD %20, %13, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
FLAT_STORE_DWORD %20, %13, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
||||||
|
|
||||||
%14 = V_LSHR_B32_e64 7, %32, implicit %exec
|
%14 = V_LSHR_B32_e64 7, %32, implicit %exec
|
||||||
FLAT_STORE_DWORD %20, %14, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
FLAT_STORE_DWORD %20, %14, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
||||||
|
|
||||||
%15 = V_LSHR_B32_e64 %27, %24, implicit %exec
|
%15 = V_LSHR_B32_e64 %27, %24, implicit %exec
|
||||||
FLAT_STORE_DWORD %20, %15, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
FLAT_STORE_DWORD %20, %15, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
||||||
|
|
||||||
%22 = V_LSHR_B32_e64 %6, 4, implicit %exec
|
%22 = V_LSHR_B32_e64 %6, 4, implicit %exec
|
||||||
FLAT_STORE_DWORD %20, %22, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
FLAT_STORE_DWORD %20, %22, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
||||||
|
|
||||||
%23 = V_LSHR_B32_e64 %6, %33, implicit %exec
|
%23 = V_LSHR_B32_e64 %6, %33, implicit %exec
|
||||||
FLAT_STORE_DWORD %20, %23, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
FLAT_STORE_DWORD %20, %23, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
||||||
|
|
||||||
%25 = V_LSHR_B32_e32 %34, %34, implicit %exec
|
%25 = V_LSHR_B32_e32 %34, %34, implicit %exec
|
||||||
FLAT_STORE_DWORD %20, %25, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
FLAT_STORE_DWORD %20, %25, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
||||||
|
|
||||||
%26 = V_LSHRREV_B32_e32 11, %10, implicit %exec
|
%26 = V_LSHRREV_B32_e32 11, %10, implicit %exec
|
||||||
FLAT_STORE_DWORD %20, %26, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
FLAT_STORE_DWORD %20, %26, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
||||||
|
|
||||||
%28 = V_LSHR_B32_e32 %27, %35, implicit %exec
|
%28 = V_LSHR_B32_e32 %27, %35, implicit %exec
|
||||||
FLAT_STORE_DWORD %20, %28, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
FLAT_STORE_DWORD %20, %28, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
|
||||||
|
|
||||||
S_ENDPGM
|
S_ENDPGM
|
||||||
|
|
||||||
|
|
|
@ -246,15 +246,15 @@ body: |
|
||||||
S_BRANCH %bb.1
|
S_BRANCH %bb.1
|
||||||
|
|
||||||
bb.1:
|
bb.1:
|
||||||
FLAT_STORE_DWORDX2 %vgpr0_vgpr1, %vgpr2_vgpr3, 0, 0, implicit %exec, implicit %flat_scr
|
FLAT_STORE_DWORDX2 %vgpr0_vgpr1, %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
|
||||||
%vgpr3 = V_MOV_B32_e32 0, implicit %exec
|
%vgpr3 = V_MOV_B32_e32 0, implicit %exec
|
||||||
FLAT_STORE_DWORDX3 %vgpr0_vgpr1, %vgpr2_vgpr3_vgpr4, 0, 0, implicit %exec, implicit %flat_scr
|
FLAT_STORE_DWORDX3 %vgpr0_vgpr1, %vgpr2_vgpr3_vgpr4, 0, 0, 0, implicit %exec, implicit %flat_scr
|
||||||
%vgpr3 = V_MOV_B32_e32 0, implicit %exec
|
%vgpr3 = V_MOV_B32_e32 0, implicit %exec
|
||||||
FLAT_STORE_DWORDX4 %vgpr0_vgpr1, %vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, implicit %exec, implicit %flat_scr
|
FLAT_STORE_DWORDX4 %vgpr0_vgpr1, %vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
|
||||||
%vgpr3 = V_MOV_B32_e32 0, implicit %exec
|
%vgpr3 = V_MOV_B32_e32 0, implicit %exec
|
||||||
FLAT_ATOMIC_CMPSWAP_X2 %vgpr0_vgpr1, %vgpr2_vgpr3_vgpr4_vgpr5, 0, implicit %exec, implicit %flat_scr
|
FLAT_ATOMIC_CMPSWAP_X2 %vgpr0_vgpr1, %vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, implicit %exec, implicit %flat_scr
|
||||||
%vgpr3 = V_MOV_B32_e32 0, implicit %exec
|
%vgpr3 = V_MOV_B32_e32 0, implicit %exec
|
||||||
FLAT_ATOMIC_FCMPSWAP_X2 %vgpr0_vgpr1, %vgpr2_vgpr3_vgpr4_vgpr5, 0, implicit %exec, implicit %flat_scr
|
FLAT_ATOMIC_FCMPSWAP_X2 %vgpr0_vgpr1, %vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, implicit %exec, implicit %flat_scr
|
||||||
%vgpr3 = V_MOV_B32_e32 0, implicit %exec
|
%vgpr3 = V_MOV_B32_e32 0, implicit %exec
|
||||||
S_ENDPGM
|
S_ENDPGM
|
||||||
|
|
||||||
|
|
|
@ -57,15 +57,15 @@ body: |
|
||||||
%4.sub1 = COPY %3.sub0
|
%4.sub1 = COPY %3.sub0
|
||||||
undef %5.sub0 = COPY %4.sub1
|
undef %5.sub0 = COPY %4.sub1
|
||||||
%5.sub1 = COPY %4.sub0
|
%5.sub1 = COPY %4.sub0
|
||||||
FLAT_STORE_DWORDX2 %vgpr0_vgpr1, killed %5, 0, 0, implicit %exec, implicit %flat_scr
|
FLAT_STORE_DWORDX2 %vgpr0_vgpr1, killed %5, 0, 0, 0, implicit %exec, implicit %flat_scr
|
||||||
|
|
||||||
%6 = IMPLICIT_DEF
|
%6 = IMPLICIT_DEF
|
||||||
undef %7.sub0_sub1 = COPY %6
|
undef %7.sub0_sub1 = COPY %6
|
||||||
%7.sub2 = COPY %3.sub0
|
%7.sub2 = COPY %3.sub0
|
||||||
FLAT_STORE_DWORDX3 %vgpr0_vgpr1, killed %7, 0, 0, implicit %exec, implicit %flat_scr
|
FLAT_STORE_DWORDX3 %vgpr0_vgpr1, killed %7, 0, 0, 0, implicit %exec, implicit %flat_scr
|
||||||
|
|
||||||
%8 = IMPLICIT_DEF
|
%8 = IMPLICIT_DEF
|
||||||
undef %9.sub0_sub1_sub2 = COPY %8
|
undef %9.sub0_sub1_sub2 = COPY %8
|
||||||
%9.sub3 = COPY %3.sub0
|
%9.sub3 = COPY %3.sub0
|
||||||
FLAT_STORE_DWORDX4 %vgpr0_vgpr1, killed %9, 0, 0, implicit %exec, implicit %flat_scr
|
FLAT_STORE_DWORDX4 %vgpr0_vgpr1, killed %9, 0, 0, 0, implicit %exec, implicit %flat_scr
|
||||||
...
|
...
|
||||||
|
|
|
@ -58,12 +58,12 @@ body: |
|
||||||
|
|
||||||
bb.3:
|
bb.3:
|
||||||
%1 = COPY killed %17
|
%1 = COPY killed %17
|
||||||
FLAT_STORE_DWORD undef %10, %1.sub2, 0, 0, implicit %exec, implicit %flat_scr
|
FLAT_STORE_DWORD undef %10, %1.sub2, 0, 0, 0, implicit %exec, implicit %flat_scr
|
||||||
%14 = COPY %1.sub1
|
%14 = COPY %1.sub1
|
||||||
%16 = COPY killed %1.sub0
|
%16 = COPY killed %1.sub0
|
||||||
undef %15.sub0 = COPY killed %16
|
undef %15.sub0 = COPY killed %16
|
||||||
%15.sub1 = COPY killed %14
|
%15.sub1 = COPY killed %14
|
||||||
FLAT_STORE_DWORDX2 undef %11, killed %15, 0, 0, implicit %exec, implicit %flat_scr
|
FLAT_STORE_DWORDX2 undef %11, killed %15, 0, 0, 0, implicit %exec, implicit %flat_scr
|
||||||
S_ENDPGM
|
S_ENDPGM
|
||||||
|
|
||||||
...
|
...
|
||||||
|
|
|
@ -214,26 +214,26 @@ body: |
|
||||||
%15 = S_ADDC_U32 %7.sub1, %0.sub1, implicit-def dead %scc, implicit %scc
|
%15 = S_ADDC_U32 %7.sub1, %0.sub1, implicit-def dead %scc, implicit %scc
|
||||||
%16 = REG_SEQUENCE %14, 1, %15, 2
|
%16 = REG_SEQUENCE %14, 1, %15, 2
|
||||||
%18 = COPY %16
|
%18 = COPY %16
|
||||||
%17 = FLAT_LOAD_DWORD %18, 0, 0, implicit %exec, implicit %flat_scr :: (load 4 from %ir.uglygep45)
|
%17 = FLAT_LOAD_DWORD %18, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 4 from %ir.uglygep45)
|
||||||
%60 = V_BFE_U32 %17, 8, 8, implicit %exec
|
%60 = V_BFE_U32 %17, 8, 8, implicit %exec
|
||||||
%61 = V_LSHLREV_B32_e32 2, killed %60, implicit %exec
|
%61 = V_LSHLREV_B32_e32 2, killed %60, implicit %exec
|
||||||
%70 = V_ADD_I32_e32 %7.sub0, %61, implicit-def %vcc, implicit %exec
|
%70 = V_ADD_I32_e32 %7.sub0, %61, implicit-def %vcc, implicit %exec
|
||||||
%66 = COPY %13
|
%66 = COPY %13
|
||||||
%65 = V_ADDC_U32_e32 0, %66, implicit-def %vcc, implicit %vcc, implicit %exec
|
%65 = V_ADDC_U32_e32 0, %66, implicit-def %vcc, implicit %vcc, implicit %exec
|
||||||
%67 = REG_SEQUENCE %70, 1, killed %65, 2
|
%67 = REG_SEQUENCE %70, 1, killed %65, 2
|
||||||
FLAT_STORE_DWORD %67, %30, 0, 0, implicit %exec, implicit %flat_scr :: (store 4 into %ir.tmp9)
|
FLAT_STORE_DWORD %67, %30, 0, 0, 0, implicit %exec, implicit %flat_scr :: (store 4 into %ir.tmp9)
|
||||||
%37 = S_ADD_U32 %14, 4, implicit-def %scc
|
%37 = S_ADD_U32 %14, 4, implicit-def %scc
|
||||||
%38 = S_ADDC_U32 %15, 0, implicit-def dead %scc, implicit %scc
|
%38 = S_ADDC_U32 %15, 0, implicit-def dead %scc, implicit %scc
|
||||||
%71 = COPY killed %37
|
%71 = COPY killed %37
|
||||||
%72 = COPY killed %38
|
%72 = COPY killed %38
|
||||||
%41 = REG_SEQUENCE killed %71, 1, killed %72, 2
|
%41 = REG_SEQUENCE killed %71, 1, killed %72, 2
|
||||||
%40 = FLAT_LOAD_DWORD killed %41, 0, 0, implicit %exec, implicit %flat_scr :: (load 4 from %ir.scevgep)
|
%40 = FLAT_LOAD_DWORD killed %41, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 4 from %ir.scevgep)
|
||||||
%73 = V_BFE_U32 %40, 8, 8, implicit %exec
|
%73 = V_BFE_U32 %40, 8, 8, implicit %exec
|
||||||
%74 = V_LSHLREV_B32_e32 2, killed %73, implicit %exec
|
%74 = V_LSHLREV_B32_e32 2, killed %73, implicit %exec
|
||||||
%83 = V_ADD_I32_e32 %7.sub0, %74, implicit-def %vcc, implicit %exec
|
%83 = V_ADD_I32_e32 %7.sub0, %74, implicit-def %vcc, implicit %exec
|
||||||
%78 = V_ADDC_U32_e32 0, %66, implicit-def %vcc, implicit %vcc, implicit %exec
|
%78 = V_ADDC_U32_e32 0, %66, implicit-def %vcc, implicit %vcc, implicit %exec
|
||||||
%80 = REG_SEQUENCE %83, 1, killed %78, 2
|
%80 = REG_SEQUENCE %83, 1, killed %78, 2
|
||||||
FLAT_STORE_DWORD %80, %30, 0, 0, implicit %exec, implicit %flat_scr :: (store 4 into %ir.tmp17)
|
FLAT_STORE_DWORD %80, %30, 0, 0, 0, implicit %exec, implicit %flat_scr :: (store 4 into %ir.tmp17)
|
||||||
%55 = S_ADD_U32 %0.sub0, 8, implicit-def %scc
|
%55 = S_ADD_U32 %0.sub0, 8, implicit-def %scc
|
||||||
%56 = S_ADDC_U32 %0.sub1, 0, implicit-def dead %scc, implicit %scc
|
%56 = S_ADDC_U32 %0.sub1, 0, implicit-def dead %scc, implicit %scc
|
||||||
%57 = REG_SEQUENCE %55, 1, killed %56, 2
|
%57 = REG_SEQUENCE %55, 1, killed %56, 2
|
||||||
|
@ -377,26 +377,26 @@ body: |
|
||||||
%15 = S_ADDC_U32 %7.sub1, %0.sub1, implicit-def dead %scc, implicit %scc
|
%15 = S_ADDC_U32 %7.sub1, %0.sub1, implicit-def dead %scc, implicit %scc
|
||||||
%16 = REG_SEQUENCE %14, 1, %15, 2
|
%16 = REG_SEQUENCE %14, 1, %15, 2
|
||||||
%18 = COPY %16
|
%18 = COPY %16
|
||||||
%17 = FLAT_LOAD_DWORD %18, 0, 0, implicit %exec, implicit %flat_scr :: (load 4 from %ir.uglygep45)
|
%17 = FLAT_LOAD_DWORD %18, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 4 from %ir.uglygep45)
|
||||||
%60 = V_BFE_U32 %17, 8, 8, implicit %exec
|
%60 = V_BFE_U32 %17, 8, 8, implicit %exec
|
||||||
%61 = V_LSHLREV_B32_e32 %84, killed %60, implicit %exec
|
%61 = V_LSHLREV_B32_e32 %84, killed %60, implicit %exec
|
||||||
%70 = V_ADD_I32_e32 %7.sub0, %61, implicit-def %vcc, implicit %exec
|
%70 = V_ADD_I32_e32 %7.sub0, %61, implicit-def %vcc, implicit %exec
|
||||||
%66 = COPY %13
|
%66 = COPY %13
|
||||||
%65 = V_ADDC_U32_e32 0, %66, implicit-def %vcc, implicit %vcc, implicit %exec
|
%65 = V_ADDC_U32_e32 0, %66, implicit-def %vcc, implicit %vcc, implicit %exec
|
||||||
%67 = REG_SEQUENCE %70, 1, killed %65, 2
|
%67 = REG_SEQUENCE %70, 1, killed %65, 2
|
||||||
FLAT_STORE_DWORD %67, %30, 0, 0, implicit %exec, implicit %flat_scr :: (store 4 into %ir.tmp9)
|
FLAT_STORE_DWORD %67, %30, 0, 0, 0, implicit %exec, implicit %flat_scr :: (store 4 into %ir.tmp9)
|
||||||
%37 = S_ADD_U32 %14, 4, implicit-def %scc
|
%37 = S_ADD_U32 %14, 4, implicit-def %scc
|
||||||
%38 = S_ADDC_U32 %15, 0, implicit-def dead %scc, implicit %scc
|
%38 = S_ADDC_U32 %15, 0, implicit-def dead %scc, implicit %scc
|
||||||
%71 = COPY killed %37
|
%71 = COPY killed %37
|
||||||
%72 = COPY killed %38
|
%72 = COPY killed %38
|
||||||
%41 = REG_SEQUENCE killed %71, 1, killed %72, 2
|
%41 = REG_SEQUENCE killed %71, 1, killed %72, 2
|
||||||
%40 = FLAT_LOAD_DWORD killed %41, 0, 0, implicit %exec, implicit %flat_scr :: (load 4 from %ir.scevgep)
|
%40 = FLAT_LOAD_DWORD killed %41, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 4 from %ir.scevgep)
|
||||||
%73 = V_BFE_U32 %40, 8, 8, implicit %exec
|
%73 = V_BFE_U32 %40, 8, 8, implicit %exec
|
||||||
%74 = V_LSHLREV_B32_e32 %84, killed %73, implicit %exec
|
%74 = V_LSHLREV_B32_e32 %84, killed %73, implicit %exec
|
||||||
%83 = V_ADD_I32_e32 %7.sub0, %74, implicit-def %vcc, implicit %exec
|
%83 = V_ADD_I32_e32 %7.sub0, %74, implicit-def %vcc, implicit %exec
|
||||||
%78 = V_ADDC_U32_e32 0, %66, implicit-def %vcc, implicit %vcc, implicit %exec
|
%78 = V_ADDC_U32_e32 0, %66, implicit-def %vcc, implicit %vcc, implicit %exec
|
||||||
%80 = REG_SEQUENCE %83, 1, killed %78, 2
|
%80 = REG_SEQUENCE %83, 1, killed %78, 2
|
||||||
FLAT_STORE_DWORD %80, %30, 0, 0, implicit %exec, implicit %flat_scr :: (store 4 into %ir.tmp17)
|
FLAT_STORE_DWORD %80, %30, 0, 0, 0, implicit %exec, implicit %flat_scr :: (store 4 into %ir.tmp17)
|
||||||
%55 = S_ADD_U32 %0.sub0, 8, implicit-def %scc
|
%55 = S_ADD_U32 %0.sub0, 8, implicit-def %scc
|
||||||
%56 = S_ADDC_U32 %0.sub1, 0, implicit-def dead %scc, implicit %scc
|
%56 = S_ADDC_U32 %0.sub1, 0, implicit-def dead %scc, implicit %scc
|
||||||
%57 = REG_SEQUENCE %55, 1, killed %56, 2
|
%57 = REG_SEQUENCE %55, 1, killed %56, 2
|
||||||
|
|
|
@ -51,21 +51,21 @@ name: flat_zero_waitcnt
|
||||||
body: |
|
body: |
|
||||||
bb.0:
|
bb.0:
|
||||||
successors: %bb.1
|
successors: %bb.1
|
||||||
%vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, implicit %exec, implicit %flat_scr :: (load 4 from %ir.global4)
|
%vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 4 from %ir.global4)
|
||||||
%vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 %vgpr7_vgpr8, 0, 0, implicit %exec, implicit %flat_scr :: (load 16 from %ir.global16)
|
%vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 %vgpr7_vgpr8, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 16 from %ir.global16)
|
||||||
%vgpr0 = V_MOV_B32_e32 %vgpr1, implicit %exec
|
%vgpr0 = V_MOV_B32_e32 %vgpr1, implicit %exec
|
||||||
S_BRANCH %bb.1
|
S_BRANCH %bb.1
|
||||||
|
|
||||||
bb.1:
|
bb.1:
|
||||||
successors: %bb.2
|
successors: %bb.2
|
||||||
%vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, implicit %exec, implicit %flat_scr
|
%vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, 0, implicit %exec, implicit %flat_scr
|
||||||
%vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 %vgpr7_vgpr8, 0, 0, implicit %exec, implicit %flat_scr :: (load 16 from %ir.global16)
|
%vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 %vgpr7_vgpr8, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 16 from %ir.global16)
|
||||||
%vgpr0 = V_MOV_B32_e32 %vgpr1, implicit %exec
|
%vgpr0 = V_MOV_B32_e32 %vgpr1, implicit %exec
|
||||||
S_BRANCH %bb.2
|
S_BRANCH %bb.2
|
||||||
|
|
||||||
bb.2:
|
bb.2:
|
||||||
%vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, implicit %exec, implicit %flat_scr :: (load 4 from %ir.flat4)
|
%vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 4 from %ir.flat4)
|
||||||
%vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 %vgpr7_vgpr8, 0, 0, implicit %exec, implicit %flat_scr :: (load 16 from %ir.flat16)
|
%vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 %vgpr7_vgpr8, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 16 from %ir.flat16)
|
||||||
%vgpr0 = V_MOV_B32_e32 %vgpr1, implicit %exec
|
%vgpr0 = V_MOV_B32_e32 %vgpr1, implicit %exec
|
||||||
S_ENDPGM
|
S_ENDPGM
|
||||||
...
|
...
|
||||||
|
@ -86,11 +86,11 @@ name: single_fallthrough_successor_no_end_block_wait
|
||||||
body: |
|
body: |
|
||||||
bb.0:
|
bb.0:
|
||||||
successors: %bb.1
|
successors: %bb.1
|
||||||
%vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, implicit %exec, implicit %flat_scr
|
%vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, 0, implicit %exec, implicit %flat_scr
|
||||||
|
|
||||||
bb.1:
|
bb.1:
|
||||||
%vgpr3_vgpr4 = V_LSHLREV_B64 4, %vgpr7_vgpr8, implicit %exec
|
%vgpr3_vgpr4 = V_LSHLREV_B64 4, %vgpr7_vgpr8, implicit %exec
|
||||||
FLAT_STORE_DWORD %vgpr3_vgpr4, %vgpr0, 0, 0, implicit %exec, implicit %flat_scr
|
FLAT_STORE_DWORD %vgpr3_vgpr4, %vgpr0, 0, 0, 0, implicit %exec, implicit %flat_scr
|
||||||
S_ENDPGM
|
S_ENDPGM
|
||||||
...
|
...
|
||||||
---
|
---
|
||||||
|
@ -114,15 +114,15 @@ name: single_branch_successor_not_next_block
|
||||||
body: |
|
body: |
|
||||||
bb.0:
|
bb.0:
|
||||||
successors: %bb.2
|
successors: %bb.2
|
||||||
%vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, implicit %exec, implicit %flat_scr
|
%vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, 0, implicit %exec, implicit %flat_scr
|
||||||
S_BRANCH %bb.2
|
S_BRANCH %bb.2
|
||||||
|
|
||||||
bb.1:
|
bb.1:
|
||||||
FLAT_STORE_DWORD %vgpr8_vgpr9, %vgpr10, 0, 0, implicit %exec, implicit %flat_scr
|
FLAT_STORE_DWORD %vgpr8_vgpr9, %vgpr10, 0, 0, 0, implicit %exec, implicit %flat_scr
|
||||||
S_ENDPGM
|
S_ENDPGM
|
||||||
|
|
||||||
bb.2:
|
bb.2:
|
||||||
%vgpr3_vgpr4 = V_LSHLREV_B64 4, %vgpr7_vgpr8, implicit %exec
|
%vgpr3_vgpr4 = V_LSHLREV_B64 4, %vgpr7_vgpr8, implicit %exec
|
||||||
FLAT_STORE_DWORD %vgpr3_vgpr4, %vgpr0, 0, 0, implicit %exec, implicit %flat_scr
|
FLAT_STORE_DWORD %vgpr3_vgpr4, %vgpr0, 0, 0, 0, implicit %exec, implicit %flat_scr
|
||||||
S_ENDPGM
|
S_ENDPGM
|
||||||
...
|
...
|
||||||
|
|
|
@ -0,0 +1,40 @@
|
||||||
|
// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s
|
||||||
|
// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck -check-prefix=VI -check-prefix=GCN %s
|
||||||
|
|
||||||
|
// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding 2>&1 %s | FileCheck -check-prefix=GFX9-ERR -check-prefix=GCNERR %s
|
||||||
|
// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding 2>&1 %s | FileCheck -check-prefix=VI-ERR -check-prefix=GCNERR %s
|
||||||
|
|
||||||
|
|
||||||
|
flat_load_dword v1, v[3:4] offset:0
|
||||||
|
// GCN: flat_load_dword v1, v[3:4] ; encoding: [0x00,0x00,0x50,0xdc,0x03,0x00,0x00,0x01]
|
||||||
|
|
||||||
|
flat_load_dword v1, v[3:4] offset:-1
|
||||||
|
// GCN-ERR: :35: error: failed parsing operand.
|
||||||
|
|
||||||
|
// FIXME: Error on VI in wrong column
|
||||||
|
flat_load_dword v1, v[3:4] offset:4095
|
||||||
|
// GFX9: flat_load_dword v1, v[3:4] offset:4095 ; encoding: [0xff,0x0f,0x50,0xdc,0x03,0x00,0x00,0x01]
|
||||||
|
// VIERR: :1: error: invalid operand for instruction
|
||||||
|
|
||||||
|
flat_load_dword v1, v[3:4] offset:4096
|
||||||
|
// GCNERR: :28: error: invalid operand for instruction
|
||||||
|
|
||||||
|
flat_load_dword v1, v[3:4] offset:4 glc
|
||||||
|
// GFX9: flat_load_dword v1, v[3:4] offset:4 glc ; encoding: [0x04,0x00,0x51,0xdc,0x03,0x00,0x00,0x01]
|
||||||
|
// VIERR: :1: error: invalid operand for instruction
|
||||||
|
|
||||||
|
flat_load_dword v1, v[3:4] offset:4 glc slc
|
||||||
|
// GFX9: flat_load_dword v1, v[3:4] offset:4 glc slc ; encoding: [0x04,0x00,0x53,0xdc,0x03,0x00,0x00,0x01]
|
||||||
|
// VIERR: :1: error: invalid operand for instruction
|
||||||
|
|
||||||
|
flat_atomic_add v[3:4], v5 offset:8 slc
|
||||||
|
// GFX9: flat_atomic_add v[3:4], v5 offset:8 slc ; encoding: [0x08,0x00,0x0a,0xdd,0x03,0x05,0x00,0x00]
|
||||||
|
// VIERR: :1: error: invalid operand for instruction
|
||||||
|
|
||||||
|
flat_atomic_swap v[3:4], v5 offset:16
|
||||||
|
// GFX9: flat_atomic_swap v[3:4], v5 offset:16 ; encoding: [0x10,0x00,0x00,0xdd,0x03,0x05,0x00,0x00]
|
||||||
|
// VIERR: :1: error: invalid operand for instruction
|
||||||
|
|
||||||
|
flat_store_dword v[3:4], v1 offset:16
|
||||||
|
// GFX9: flat_store_dword v[3:4], v1 offset:16 ; encoding: [0x10,0x00,0x70,0xdc,0x03,0x01,0x00,0x00]
|
||||||
|
// VIERR: :1: error: invalid operand for instruction
|
|
@ -49,9 +49,10 @@ flat_store_dword v[3:4], v1 slc
|
||||||
|
|
||||||
// FIXME: For atomic instructions, glc must be placed immediately following
|
// FIXME: For atomic instructions, glc must be placed immediately following
|
||||||
// the data regiser. These forms aren't currently supported:
|
// the data regiser. These forms aren't currently supported:
|
||||||
|
// FIXME: offset:0 required
|
||||||
// flat_atomic_add v1, v[3:4], v5 slc glc
|
// flat_atomic_add v1, v[3:4], v5 slc glc
|
||||||
|
|
||||||
flat_atomic_add v1 v[3:4], v5 glc slc
|
flat_atomic_add v1, v[3:4], v5 offset:0 glc slc
|
||||||
// NOSI: error:
|
// NOSI: error:
|
||||||
// CI: flat_atomic_add v1, v[3:4], v5 glc slc ; encoding: [0x00,0x00,0xcb,0xdc,0x03,0x05,0x00,0x01]
|
// CI: flat_atomic_add v1, v[3:4], v5 glc slc ; encoding: [0x00,0x00,0xcb,0xdc,0x03,0x05,0x00,0x01]
|
||||||
// VI: flat_atomic_add v1, v[3:4], v5 glc slc ; encoding: [0x00,0x00,0x0b,0xdd,0x03,0x05,0x00,0x01]
|
// VI: flat_atomic_add v1, v[3:4], v5 glc slc ; encoding: [0x00,0x00,0x0b,0xdd,0x03,0x05,0x00,0x01]
|
||||||
|
|
Loading…
Reference in New Issue