forked from OSchip/llvm-project
[AMDGPU][MC][GFX8][GFX9] Corrected names of integer v_{add/addc/sub/subrev/subb/subbrev}
See bug 34765: https://bugs.llvm.org//show_bug.cgi?id=34765 Reviewers: tamazov, SamWot, arsenm, vpykhtin Differential Revision: https://reviews.llvm.org/D40088 llvm-svn: 318675
This commit is contained in:
parent
60cc1d3218
commit
a0342dc9eb
|
@ -107,14 +107,15 @@ static SIEncodingFamily subtargetEncodingFamily(const AMDGPUSubtarget &ST) {
|
|||
|
||||
int AMDGPUInstrInfo::pseudoToMCOpcode(int Opcode) const {
|
||||
SIEncodingFamily Gen = subtargetEncodingFamily(ST);
|
||||
|
||||
if ((get(Opcode).TSFlags & SIInstrFlags::renamedInGFX9) != 0 &&
|
||||
ST.getGeneration() >= AMDGPUSubtarget::GFX9)
|
||||
Gen = SIEncodingFamily::GFX9;
|
||||
|
||||
if (get(Opcode).TSFlags & SIInstrFlags::SDWA)
|
||||
Gen = ST.getGeneration() == AMDGPUSubtarget::GFX9 ? SIEncodingFamily::SDWA9
|
||||
: SIEncodingFamily::SDWA;
|
||||
|
||||
if ((get(Opcode).TSFlags & SIInstrFlags::F16_ZFILL) != 0 &&
|
||||
ST.getGeneration() >= AMDGPUSubtarget::GFX9)
|
||||
Gen = SIEncodingFamily::GFX9;
|
||||
|
||||
int MCOp = AMDGPU::getMCOpcode(Opcode, Gen);
|
||||
|
||||
// -1 means that Opcode is already a native instruction.
|
||||
|
|
|
@ -212,6 +212,9 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
|
|||
Res = tryDecodeInst(DecoderTableAMDGPU32, MI, DW, Address);
|
||||
if (Res) break;
|
||||
|
||||
Res = tryDecodeInst(DecoderTableGFX932, MI, DW, Address);
|
||||
if (Res) break;
|
||||
|
||||
if (Bytes.size() < 4) break;
|
||||
const uint64_t QW = ((uint64_t)eatBytes<uint32_t>(Bytes) << 32) | DW;
|
||||
Res = tryDecodeInst(DecoderTableVI64, MI, QW, Address);
|
||||
|
|
|
@ -69,7 +69,7 @@ enum : uint64_t {
|
|||
VOPAsmPrefer32Bit = UINT64_C(1) << 41,
|
||||
VOP3_OPSEL = UINT64_C(1) << 42,
|
||||
maybeAtomic = UINT64_C(1) << 43,
|
||||
F16_ZFILL = UINT64_C(1) << 44,
|
||||
renamedInGFX9 = UINT64_C(1) << 44,
|
||||
|
||||
// Is a clamp on FP type.
|
||||
FPClamp = UINT64_C(1) << 45,
|
||||
|
|
|
@ -95,9 +95,9 @@ class InstSI <dag outs, dag ins, string asm = "",
|
|||
// Is it possible for this instruction to be atomic?
|
||||
field bit maybeAtomic = 0;
|
||||
|
||||
// This bit indicates that this is a 16-bit instruction which zero-fills
|
||||
// unused bits in dst. Note that new GFX9 opcodes preserve unused bits.
|
||||
field bit F16_ZFILL = 0;
|
||||
// This bit indicates that this is a VI instruction which is renamed
|
||||
// in GFX9. Required for correct mapping from pseudo to MC.
|
||||
field bit renamedInGFX9 = 0;
|
||||
|
||||
// This bit indicates that this has a floating point result type, so
|
||||
// the clamp modifier has floating point semantics.
|
||||
|
@ -164,7 +164,7 @@ class InstSI <dag outs, dag ins, string asm = "",
|
|||
let TSFlags{42} = VOP3_OPSEL;
|
||||
|
||||
let TSFlags{43} = maybeAtomic;
|
||||
let TSFlags{44} = F16_ZFILL;
|
||||
let TSFlags{44} = renamedInGFX9;
|
||||
|
||||
let TSFlags{45} = FPClamp;
|
||||
let TSFlags{46} = IntClamp;
|
||||
|
|
|
@ -1528,47 +1528,3 @@ def : FP16Med3Pat<f16, V_MED3_F16>;
|
|||
def : Int16Med3Pat<V_MED3_I16, smax, smax_oneuse, smin_oneuse, i16>;
|
||||
def : Int16Med3Pat<V_MED3_U16, umax, umax_oneuse, umin_oneuse, i16>;
|
||||
} // End Predicates = [isGFX9]
|
||||
|
||||
//============================================================================//
|
||||
// Assembler aliases
|
||||
//============================================================================//
|
||||
|
||||
multiclass NoCarryAlias<string Inst,
|
||||
Instruction Inst32NC, Instruction Inst64NC,
|
||||
Instruction Inst32CO, Instruction Inst64CO> {
|
||||
def : InstAlias<Inst#" $vdst, $src0, $src1",
|
||||
(Inst32NC VGPR_32:$vdst, VSrc_b32:$src0, VGPR_32:$src1), 1000>,
|
||||
Requires<[HasAddNoCarryInsts]>;
|
||||
|
||||
def : InstAlias<Inst#" $vdst, $src0, $src1",
|
||||
(Inst64NC VGPR_32:$vdst, VCSrc_b32:$src0, VCSrc_b32:$src1), -10>,
|
||||
Requires<[HasAddNoCarryInsts]>;
|
||||
|
||||
def : InstAlias<Inst#" $vdst, vcc, $src0, $src1",
|
||||
(Inst32CO VGPR_32:$vdst, VSrc_b32:$src0, VGPR_32:$src1), 1000>,
|
||||
Requires<[HasAddNoCarryInsts]>;
|
||||
|
||||
def : InstAlias<Inst#" $vdst, $sdst, $src0, $src1",
|
||||
(Inst64CO VGPR_32:$vdst, SReg_64:$sdst, VSrc_b32:$src0, VGPR_32:$src1), -10>,
|
||||
Requires<[HasAddNoCarryInsts]>;
|
||||
}
|
||||
|
||||
// gfx9 made a mess of add instruction names. The existing add
|
||||
// instructions add _co added to the names, and their old names were
|
||||
// repurposed to a version without carry out.
|
||||
// TODO: Do we need SubtargetPredicates for MnemonicAliases?
|
||||
let Predicates = [HasAddNoCarryInsts] in {
|
||||
defm : NoCarryAlias<"v_add_u32", V_ADD_U32_e32_vi, V_ADD_U32_e64_vi,
|
||||
V_ADD_I32_e32_vi, V_ADD_I32_e64_vi>;
|
||||
defm : NoCarryAlias<"v_sub_u32", V_SUB_U32_e32_vi, V_SUB_U32_e64_vi,
|
||||
V_SUB_I32_e32_vi, V_SUB_I32_e64_vi>;
|
||||
defm : NoCarryAlias<"v_subrev_u32",
|
||||
V_SUBREV_U32_e32_vi, V_SUBREV_U32_e64_vi,
|
||||
V_SUBREV_I32_e32_vi, V_SUBREV_I32_e64_vi>;
|
||||
}
|
||||
|
||||
let Predicates = [NotHasAddNoCarryInsts] in {
|
||||
def : MnemonicAlias<"v_add_u32", "v_add_i32">;
|
||||
def : MnemonicAlias<"v_sub_u32", "v_sub_i32">;
|
||||
def : MnemonicAlias<"v_subrev_u32", "v_subrev_i32">;
|
||||
}
|
||||
|
|
|
@ -143,20 +143,22 @@ multiclass VOP2bInst <string opName,
|
|||
VOPProfile P,
|
||||
SDPatternOperator node = null_frag,
|
||||
string revOp = opName,
|
||||
bit GFX9Renamed = 0,
|
||||
bit useSGPRInput = !eq(P.NumSrcArgs, 3)> {
|
||||
let renamedInGFX9 = GFX9Renamed in {
|
||||
let SchedRW = [Write32Bit, WriteSALU] in {
|
||||
let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]), Defs = [VCC] in {
|
||||
def _e32 : VOP2_Pseudo <opName, P>,
|
||||
Commutable_REV<revOp#"_e32", !eq(revOp, opName)>;
|
||||
|
||||
let SchedRW = [Write32Bit, WriteSALU] in {
|
||||
let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]), Defs = [VCC] in {
|
||||
def _e32 : VOP2_Pseudo <opName, P>,
|
||||
Commutable_REV<revOp#"_e32", !eq(revOp, opName)>;
|
||||
|
||||
def _sdwa : VOP2_SDWA_Pseudo <opName, P> {
|
||||
let AsmMatchConverter = "cvtSdwaVOP2b";
|
||||
def _sdwa : VOP2_SDWA_Pseudo <opName, P> {
|
||||
let AsmMatchConverter = "cvtSdwaVOP2b";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
def _e64 : VOP3_Pseudo <opName, P, getVOP2Pat64<node, P>.ret>,
|
||||
Commutable_REV<revOp#"_e64", !eq(revOp, opName)>;
|
||||
def _e64 : VOP3_Pseudo <opName, P, getVOP2Pat64<node, P>.ret>,
|
||||
Commutable_REV<revOp#"_e64", !eq(revOp, opName)>;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -278,13 +280,13 @@ def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> {
|
|||
|
||||
let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0,
|
||||
Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1,
|
||||
clampmod:$clamp, omod:$omod,
|
||||
clampmod:$clamp,
|
||||
dst_sel:$dst_sel, dst_unused:$dst_unused,
|
||||
src0_sel:$src0_sel, src1_sel:$src1_sel);
|
||||
|
||||
let InsDPP = (ins DstRCDPP:$old,
|
||||
Src0Mod:$src0_modifiers, Src0DPP:$src0,
|
||||
Src1Mod:$src1_modifiers, Src1DPP:$src1,
|
||||
Src0DPP:$src0,
|
||||
Src1DPP:$src1,
|
||||
dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
|
||||
bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
|
||||
let HasExt = 1;
|
||||
|
@ -370,12 +372,12 @@ def V_MADAK_F32 : VOP2_Pseudo <"v_madak_f32", VOP_MADAK_F32, [], "">;
|
|||
|
||||
// V_ADD_I32, V_SUB_I32, and V_SUBREV_I32 where renamed to *_U32 in VI,
|
||||
// but the VI instructions behave the same as the SI versions.
|
||||
defm V_ADD_I32 : VOP2bInst <"v_add_i32", VOP2b_I32_I1_I32_I32>;
|
||||
defm V_SUB_I32 : VOP2bInst <"v_sub_i32", VOP2b_I32_I1_I32_I32>;
|
||||
defm V_SUBREV_I32 : VOP2bInst <"v_subrev_i32", VOP2b_I32_I1_I32_I32, null_frag, "v_sub_i32">;
|
||||
defm V_ADDC_U32 : VOP2bInst <"v_addc_u32", VOP2b_I32_I1_I32_I32_I1>;
|
||||
defm V_SUBB_U32 : VOP2bInst <"v_subb_u32", VOP2b_I32_I1_I32_I32_I1>;
|
||||
defm V_SUBBREV_U32 : VOP2bInst <"v_subbrev_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32">;
|
||||
defm V_ADD_I32 : VOP2bInst <"v_add_i32", VOP2b_I32_I1_I32_I32, null_frag, "v_add_i32", 1>;
|
||||
defm V_SUB_I32 : VOP2bInst <"v_sub_i32", VOP2b_I32_I1_I32_I32, null_frag, "v_sub_i32", 1>;
|
||||
defm V_SUBREV_I32 : VOP2bInst <"v_subrev_i32", VOP2b_I32_I1_I32_I32, null_frag, "v_sub_i32", 1>;
|
||||
defm V_ADDC_U32 : VOP2bInst <"v_addc_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_addc_u32", 1>;
|
||||
defm V_SUBB_U32 : VOP2bInst <"v_subb_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32", 1>;
|
||||
defm V_SUBBREV_U32 : VOP2bInst <"v_subbrev_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32", 1>;
|
||||
|
||||
|
||||
let SubtargetPredicate = HasAddNoCarryInsts in {
|
||||
|
@ -660,8 +662,8 @@ defm V_CVT_PK_I16_I32 : VOP2_Real_e32e64_si <0x31>;
|
|||
// VI
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class VOP2_DPP <bits<6> op, VOP2_Pseudo ps, VOPProfile P = ps.Pfl> :
|
||||
VOP_DPP <ps.OpName, P> {
|
||||
class VOP2_DPP <bits<6> op, VOP2_Pseudo ps, string OpName = ps.OpName, VOPProfile P = ps.Pfl> :
|
||||
VOP_DPP <OpName, P> {
|
||||
let Defs = ps.Defs;
|
||||
let Uses = ps.Uses;
|
||||
let SchedRW = ps.SchedRW;
|
||||
|
@ -712,12 +714,6 @@ multiclass VOP2_Real_e64only_vi <bits<10> op> {
|
|||
}
|
||||
}
|
||||
|
||||
multiclass Base_VOP2be_Real_e32e64_vi <bits<6> op> : VOP2_Real_e32_vi<op> {
|
||||
def _e64_vi :
|
||||
VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>,
|
||||
VOP3be_vi <{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
|
||||
}
|
||||
|
||||
multiclass Base_VOP2_Real_e32e64_vi <bits<6> op> :
|
||||
VOP2_Real_e32_vi<op>,
|
||||
VOP2_Real_e64_vi<{0, 1, 0, 0, op{5-0}}>;
|
||||
|
@ -736,12 +732,85 @@ multiclass VOP2_SDWA9_Real <bits<6> op> {
|
|||
VOP2_SDWA9Ae <op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl>;
|
||||
}
|
||||
|
||||
multiclass VOP2be_Real_e32e64_vi <bits<6> op> :
|
||||
Base_VOP2be_Real_e32e64_vi<op>, VOP2_SDWA_Real<op>, VOP2_SDWA9_Real<op> {
|
||||
// For now left dpp only for asm/dasm
|
||||
// TODO: add corresponding pseudo
|
||||
def _dpp : VOP2_DPP<op, !cast<VOP2_Pseudo>(NAME#"_e32")>;
|
||||
let AssemblerPredicates = [isVIOnly] in {
|
||||
|
||||
multiclass VOP2be_Real_e32e64_vi_only <bits<6> op, string OpName, string AsmName> {
|
||||
def _e32_vi :
|
||||
VOP2_Real<!cast<VOP2_Pseudo>(OpName#"_e32"), SIEncodingFamily.VI>,
|
||||
VOP2e<op{5-0}, !cast<VOP2_Pseudo>(OpName#"_e32").Pfl> {
|
||||
VOP2_Pseudo ps = !cast<VOP2_Pseudo>(OpName#"_e32");
|
||||
let AsmString = AsmName # ps.AsmOperands;
|
||||
let DecoderNamespace = "VI";
|
||||
}
|
||||
def _e64_vi :
|
||||
VOP3_Real<!cast<VOP3_Pseudo>(OpName#"_e64"), SIEncodingFamily.VI>,
|
||||
VOP3be_vi <{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(OpName#"_e64").Pfl> {
|
||||
VOP3_Pseudo ps = !cast<VOP3_Pseudo>(OpName#"_e64");
|
||||
let AsmString = AsmName # ps.AsmOperands;
|
||||
let DecoderNamespace = "VI";
|
||||
}
|
||||
def _sdwa_vi :
|
||||
VOP_SDWA_Real <!cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa")>,
|
||||
VOP2_SDWAe <op{5-0}, !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa").Pfl> {
|
||||
VOP2_SDWA_Pseudo ps = !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa");
|
||||
let AsmString = AsmName # ps.AsmOperands;
|
||||
}
|
||||
def _dpp :
|
||||
VOP2_DPP<op, !cast<VOP2_Pseudo>(OpName#"_e32"), AsmName>;
|
||||
}
|
||||
}
|
||||
|
||||
let AssemblerPredicates = [isGFX9] in {
|
||||
|
||||
multiclass VOP2be_Real_e32e64_gfx9 <bits<6> op, string OpName, string AsmName> {
|
||||
def _e32_gfx9 :
|
||||
VOP2_Real<!cast<VOP2_Pseudo>(OpName#"_e32"), SIEncodingFamily.GFX9>,
|
||||
VOP2e<op{5-0}, !cast<VOP2_Pseudo>(OpName#"_e32").Pfl> {
|
||||
VOP2_Pseudo ps = !cast<VOP2_Pseudo>(OpName#"_e32");
|
||||
let AsmString = AsmName # ps.AsmOperands;
|
||||
let DecoderNamespace = "GFX9";
|
||||
}
|
||||
def _e64_gfx9 :
|
||||
VOP3_Real<!cast<VOP3_Pseudo>(OpName#"_e64"), SIEncodingFamily.GFX9>,
|
||||
VOP3be_vi <{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(OpName#"_e64").Pfl> {
|
||||
VOP3_Pseudo ps = !cast<VOP3_Pseudo>(OpName#"_e64");
|
||||
let AsmString = AsmName # ps.AsmOperands;
|
||||
let DecoderNamespace = "GFX9";
|
||||
}
|
||||
def _sdwa_gfx9 :
|
||||
VOP_SDWA9_Real <!cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa")>,
|
||||
VOP2_SDWA9Ae <op{5-0}, !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa").Pfl> {
|
||||
VOP2_SDWA_Pseudo ps = !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa");
|
||||
let AsmString = AsmName # ps.AsmOperands;
|
||||
}
|
||||
def _dpp_gfx9 :
|
||||
VOP2_DPP<op, !cast<VOP2_Pseudo>(OpName#"_e32"), AsmName> {
|
||||
let DecoderNamespace = "SDWA9";
|
||||
}
|
||||
}
|
||||
|
||||
multiclass VOP2_Real_e32e64_gfx9 <bits<6> op> {
|
||||
def _e32_gfx9 :
|
||||
VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.GFX9>,
|
||||
VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>{
|
||||
let DecoderNamespace = "GFX9";
|
||||
}
|
||||
def _e64_gfx9 :
|
||||
VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX9>,
|
||||
VOP3e_vi <{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> {
|
||||
let DecoderNamespace = "GFX9";
|
||||
}
|
||||
def _sdwa_gfx9 :
|
||||
VOP_SDWA9_Real <!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>,
|
||||
VOP2_SDWA9Ae <op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl> {
|
||||
}
|
||||
def _dpp_gfx9 :
|
||||
VOP2_DPP<op, !cast<VOP2_Pseudo>(NAME#"_e32")> {
|
||||
let DecoderNamespace = "SDWA9";
|
||||
}
|
||||
}
|
||||
|
||||
} // AssemblerPredicates = [isGFX9]
|
||||
|
||||
multiclass VOP2_Real_e32e64_vi <bits<6> op> :
|
||||
Base_VOP2_Real_e32e64_vi<op>, VOP2_SDWA_Real<op>, VOP2_SDWA9_Real<op> {
|
||||
|
@ -775,12 +844,24 @@ defm V_XOR_B32 : VOP2_Real_e32e64_vi <0x15>;
|
|||
defm V_MAC_F32 : VOP2_Real_e32e64_vi <0x16>;
|
||||
defm V_MADMK_F32 : VOP2_Real_MADK_vi <0x17>;
|
||||
defm V_MADAK_F32 : VOP2_Real_MADK_vi <0x18>;
|
||||
defm V_ADD_I32 : VOP2be_Real_e32e64_vi <0x19>;
|
||||
defm V_SUB_I32 : VOP2be_Real_e32e64_vi <0x1a>;
|
||||
defm V_SUBREV_I32 : VOP2be_Real_e32e64_vi <0x1b>;
|
||||
defm V_ADDC_U32 : VOP2be_Real_e32e64_vi <0x1c>;
|
||||
defm V_SUBB_U32 : VOP2be_Real_e32e64_vi <0x1d>;
|
||||
defm V_SUBBREV_U32 : VOP2be_Real_e32e64_vi <0x1e>;
|
||||
|
||||
defm V_ADD_U32 : VOP2be_Real_e32e64_vi_only <0x19, "V_ADD_I32", "v_add_u32">;
|
||||
defm V_SUB_U32 : VOP2be_Real_e32e64_vi_only <0x1a, "V_SUB_I32", "v_sub_u32">;
|
||||
defm V_SUBREV_U32 : VOP2be_Real_e32e64_vi_only <0x1b, "V_SUBREV_I32", "v_subrev_u32">;
|
||||
defm V_ADDC_U32 : VOP2be_Real_e32e64_vi_only <0x1c, "V_ADDC_U32", "v_addc_u32">;
|
||||
defm V_SUBB_U32 : VOP2be_Real_e32e64_vi_only <0x1d, "V_SUBB_U32", "v_subb_u32">;
|
||||
defm V_SUBBREV_U32 : VOP2be_Real_e32e64_vi_only <0x1e, "V_SUBBREV_U32", "v_subbrev_u32">;
|
||||
|
||||
defm V_ADD_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x19, "V_ADD_I32", "v_add_co_u32">;
|
||||
defm V_SUB_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1a, "V_SUB_I32", "v_sub_co_u32">;
|
||||
defm V_SUBREV_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1b, "V_SUBREV_I32", "v_subrev_co_u32">;
|
||||
defm V_ADDC_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1c, "V_ADDC_U32", "v_addc_co_u32">;
|
||||
defm V_SUBB_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1d, "V_SUBB_U32", "v_subb_co_u32">;
|
||||
defm V_SUBBREV_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1e, "V_SUBBREV_U32", "v_subbrev_co_u32">;
|
||||
|
||||
defm V_ADD_U32 : VOP2_Real_e32e64_gfx9 <0x34>;
|
||||
defm V_SUB_U32 : VOP2_Real_e32e64_gfx9 <0x35>;
|
||||
defm V_SUBREV_U32 : VOP2_Real_e32e64_gfx9 <0x36>;
|
||||
|
||||
defm V_READLANE_B32 : VOP32_Real_vi <0x289>;
|
||||
defm V_WRITELANE_B32 : VOP32_Real_vi <0x28a>;
|
||||
|
@ -840,9 +921,3 @@ def : SI2_VI3Alias <"v_cvt_pknorm_u16_f32", V_CVT_PKNORM_U16_F32_e64_vi>;
|
|||
def : SI2_VI3Alias <"v_cvt_pkrtz_f16_f32", V_CVT_PKRTZ_F16_F32_e64_vi>;
|
||||
|
||||
} // End SubtargetPredicate = isVI
|
||||
|
||||
let SubtargetPredicate = HasAddNoCarryInsts in {
|
||||
defm V_ADD_U32 : VOP2_Real_e32e64_vi <0x34>;
|
||||
defm V_SUB_U32 : VOP2_Real_e32e64_vi <0x35>;
|
||||
defm V_SUBREV_U32 : VOP2_Real_e32e64_vi <0x36>;
|
||||
}
|
||||
|
|
|
@ -410,7 +410,7 @@ def V_MAD_I64_I32 : VOP3Inst <"v_mad_i64_i32", VOP3b_I64_I1_I32_I32_I64>;
|
|||
|
||||
let SubtargetPredicate = Has16BitInsts in {
|
||||
|
||||
let F16_ZFILL = 1 in {
|
||||
let renamedInGFX9 = 1 in {
|
||||
def V_DIV_FIXUP_F16 : VOP3Inst <"v_div_fixup_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, AMDGPUdiv_fixup>;
|
||||
}
|
||||
let SubtargetPredicate = isGFX9 in {
|
||||
|
@ -419,7 +419,7 @@ def V_DIV_FIXUP_F16_gfx9 : VOP3Inst <"v_div_fixup_f16_gfx9", VOP3_Profile<VOP_F1
|
|||
|
||||
let isCommutable = 1 in {
|
||||
|
||||
let F16_ZFILL = 1 in {
|
||||
let renamedInGFX9 = 1 in {
|
||||
def V_MAD_F16 : VOP3Inst <"v_mad_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, fmad>;
|
||||
def V_MAD_U16 : VOP3Inst <"v_mad_u16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_CLAMP>>;
|
||||
def V_MAD_I16 : VOP3Inst <"v_mad_i16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_CLAMP>>;
|
||||
|
@ -506,6 +506,9 @@ def V_MAD_I32_I16 : VOP3Inst <"v_mad_i32_i16", VOP3_Profile<VOP_I32_I16_I16_I32,
|
|||
|
||||
def V_CVT_PKNORM_I16_F16 : VOP3Inst <"v_cvt_pknorm_i16_f16", VOP3_Profile<VOP_B32_F16_F16, VOP3_OPSEL>>;
|
||||
def V_CVT_PKNORM_U16_F16 : VOP3Inst <"v_cvt_pknorm_u16_f16", VOP3_Profile<VOP_B32_F16_F16, VOP3_OPSEL>>;
|
||||
|
||||
def V_ADD_I32_gfx9 : VOP3Inst <"v_add_i32_gfx9", VOP3_Profile<VOP_I32_I32_I32>>;
|
||||
def V_SUB_I32_gfx9 : VOP3Inst <"v_sub_i32_gfx9", VOP3_Profile<VOP_I32_I32_I32>>;
|
||||
} // End SubtargetPredicate = isGFX9
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -703,6 +706,14 @@ multiclass VOP3OpSel_F16_Real_gfx9<bits<10> op, string AsmName> {
|
|||
}
|
||||
}
|
||||
|
||||
multiclass VOP3_Real_gfx9<bits<10> op, string AsmName> {
|
||||
def _vi : VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.GFX9>,
|
||||
VOP3e_vi <op, !cast<VOP3_Pseudo>(NAME).Pfl> {
|
||||
VOP3_Pseudo ps = !cast<VOP3_Pseudo>(NAME);
|
||||
let AsmString = AsmName # ps.AsmOperands;
|
||||
}
|
||||
}
|
||||
|
||||
} // End AssemblerPredicates = [isGFX9], DecoderNamespace = "GFX9"
|
||||
|
||||
defm V_MAD_U64_U32 : VOP3be_Real_vi <0x1E8>;
|
||||
|
@ -769,6 +780,9 @@ defm V_MAD_I16_gfx9 : VOP3OpSel_F16_Real_gfx9 <0x205, "v_mad_i16">;
|
|||
defm V_FMA_F16_gfx9 : VOP3OpSel_F16_Real_gfx9 <0x206, "v_fma_f16">;
|
||||
defm V_DIV_FIXUP_F16_gfx9 : VOP3OpSel_F16_Real_gfx9 <0x207, "v_div_fixup_f16">;
|
||||
|
||||
defm V_ADD_I32_gfx9 : VOP3_Real_gfx9 <0x29c, "v_add_i32">;
|
||||
defm V_SUB_I32_gfx9 : VOP3_Real_gfx9 <0x29d, "v_sub_i32">;
|
||||
|
||||
defm V_INTERP_P1_F32_e64 : VOP3Interp_Real_vi <0x270>;
|
||||
defm V_INTERP_P2_F32_e64 : VOP3Interp_Real_vi <0x271>;
|
||||
defm V_INTERP_MOV_F32_e64 : VOP3Interp_Real_vi <0x272>;
|
||||
|
|
|
@ -52,8 +52,8 @@ define amdgpu_kernel void @s_test_add_self_v2i16(<2 x i16> addrspace(1)* %out, <
|
|||
; GCN-LABEL: {{^}}s_test_add_v2i16_kernarg:
|
||||
; GFX9: v_pk_add_u16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
|
||||
|
||||
; VI: v_add_i32
|
||||
; VI: v_add_i32_sdwa
|
||||
; VI: v_add_u32
|
||||
; VI: v_add_u32_sdwa
|
||||
define amdgpu_kernel void @s_test_add_v2i16_kernarg(<2 x i16> addrspace(1)* %out, <2 x i16> %a, <2 x i16> %b) #1 {
|
||||
%add = add <2 x i16> %a, %b
|
||||
store <2 x i16> %add, <2 x i16> addrspace(1)* %out
|
||||
|
|
|
@ -13,7 +13,7 @@ declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
|||
|
||||
; GCN-LABEL: {{^}}work_item_info:
|
||||
; GCN-NOT: v0
|
||||
; GCN: v_add_i32_e32 [[RESULT:v[0-9]+]], vcc, v0, v{{[0-9]+}}
|
||||
; GCN: v_add_{{[iu]}}32_e32 [[RESULT:v[0-9]+]], vcc, v0, v{{[0-9]+}}
|
||||
; GCN: buffer_store_dword [[RESULT]]
|
||||
define amdgpu_kernel void @work_item_info(i32 addrspace(1)* %out, i32 %in) {
|
||||
entry:
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
|
||||
; GCN-LABEL: {{^}}shader_cc:
|
||||
; GCN: v_add_i32_e32 v0, vcc, s8, v0
|
||||
; GCN: v_add_{{[iu]}}32_e32 v0, vcc, s8, v0
|
||||
define amdgpu_cs float @shader_cc(<4 x i32> inreg, <4 x i32> inreg, i32 inreg %w, float %v) {
|
||||
%vi = bitcast float %v to i32
|
||||
%x = add i32 %vi, %w
|
||||
|
|
|
@ -391,7 +391,7 @@ entry:
|
|||
; FUNC-LABEL: ptrtoint:
|
||||
; SI-NOT: ds_write
|
||||
; SI: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen
|
||||
; SI: v_add_i32_e32 [[ADD_OFFSET:v[0-9]+]], vcc, 5,
|
||||
; SI: v_add_{{[iu]}}32_e32 [[ADD_OFFSET:v[0-9]+]], vcc, 5,
|
||||
; SI: buffer_load_dword v{{[0-9]+}}, [[ADD_OFFSET:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ;
|
||||
define amdgpu_kernel void @ptrtoint(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
|
||||
%alloca = alloca [16 x i32]
|
||||
|
|
|
@ -9,8 +9,8 @@
|
|||
; VI-SDWA: v_lshlrev_b32_sdwa v[[ADDRBASE:[0-9]+]], v[[SHIFT]], v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
|
||||
; CI: v_lshrrev_b32_e32 v[[SHR:[0-9]+]], 6, v{{[0-9]+}}
|
||||
; CI: v_and_b32_e32 v[[ADDRLO:[0-9]+]], 0x3fc, v[[SHR]]
|
||||
; VI: v_add_i32_e32 v[[ADDRLO:[0-9]+]], vcc, s{{[0-9]+}}, v[[ADDRBASE]]
|
||||
; VI-SDWA: v_add_i32_e32 v[[ADDRLO:[0-9]+]], vcc, s{{[0-9]+}}, v[[ADDRBASE]]
|
||||
; VI: v_add_u32_e32 v[[ADDRLO:[0-9]+]], vcc, s{{[0-9]+}}, v[[ADDRBASE]]
|
||||
; VI-SDWA: v_add_u32_e32 v[[ADDRLO:[0-9]+]], vcc, s{{[0-9]+}}, v[[ADDRBASE]]
|
||||
; GCN: load_dword v{{[0-9]+}}, v{{\[}}[[ADDRLO]]:
|
||||
define amdgpu_kernel void @bfe_combine8(i32 addrspace(1)* nocapture %arg, i32 %x) {
|
||||
%id = tail call i32 @llvm.amdgcn.workitem.id.x() #2
|
||||
|
@ -29,11 +29,11 @@ define amdgpu_kernel void @bfe_combine8(i32 addrspace(1)* nocapture %arg, i32 %x
|
|||
; VI-SDWA: v_mov_b32_e32 v[[SHIFT:[0-9]+]], 15
|
||||
; VI-SDWA: v_lshlrev_b32_sdwa v[[ADDRBASE1:[0-9]+]], v[[SHIFT]], v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; VI-SDWA: v_lshlrev_b64 v{{\[}}[[ADDRBASE:[0-9]+]]:{{[^\]+}}], 2, v{{\[}}[[ADDRBASE1]]:{{[^\]+}}]
|
||||
; VI-SDWA: v_add_i32_e32 v[[ADDRLO:[0-9]+]], vcc, s{{[0-9]+}}, v[[ADDRBASE]]
|
||||
; VI-SDWA: v_add_u32_e32 v[[ADDRLO:[0-9]+]], vcc, s{{[0-9]+}}, v[[ADDRBASE]]
|
||||
; CI: v_lshrrev_b32_e32 v[[SHR:[0-9]+]], 1, v{{[0-9]+}}
|
||||
; CI: v_and_b32_e32 v[[AND:[0-9]+]], 0x7fff8000, v[[SHR]]
|
||||
; CI: v_lshl_b64 v{{\[}}[[ADDRLO:[0-9]+]]:{{[^\]+}}], v{{\[}}[[AND]]:{{[^\]+}}], 2
|
||||
; VI: v_add_i32_e32 v[[ADDRLO:[0-9]+]], vcc, s{{[0-9]+}}, v[[ADDRBASE]]
|
||||
; VI: v_add_u32_e32 v[[ADDRLO:[0-9]+]], vcc, s{{[0-9]+}}, v[[ADDRBASE]]
|
||||
; GCN: load_dword v{{[0-9]+}}, v{{\[}}[[ADDRLO]]:
|
||||
define amdgpu_kernel void @bfe_combine16(i32 addrspace(1)* nocapture %arg, i32 %x) {
|
||||
%id = tail call i32 @llvm.amdgcn.workitem.id.x() #2
|
||||
|
|
|
@ -22,7 +22,7 @@ define amdgpu_kernel void @v_ubfe_sub_i32(i32 addrspace(1)* %out, i32 addrspace(
|
|||
; GCN-LABEL: {{^}}v_ubfe_sub_multi_use_shl_i32:
|
||||
; GCN: {{buffer|flat}}_load_dword [[SRC:v[0-9]+]]
|
||||
; GCN: {{buffer|flat}}_load_dword [[WIDTH:v[0-9]+]]
|
||||
; GCN: v_sub_i32_e32 [[SUB:v[0-9]+]], vcc, 32, [[WIDTH]]
|
||||
; GCN: v_sub_{{[iu]}}32_e32 [[SUB:v[0-9]+]], vcc, 32, [[WIDTH]]
|
||||
|
||||
; SI-NEXT: v_lshl_b32_e32 [[SHL:v[0-9]+]], [[SRC]], [[SUB]]
|
||||
; SI-NEXT: v_lshr_b32_e32 [[BFE:v[0-9]+]], [[SHL]], [[SUB]]
|
||||
|
@ -100,7 +100,7 @@ define amdgpu_kernel void @v_sbfe_sub_i32(i32 addrspace(1)* %out, i32 addrspace(
|
|||
; GCN-LABEL: {{^}}v_sbfe_sub_multi_use_shl_i32:
|
||||
; GCN: {{buffer|flat}}_load_dword [[SRC:v[0-9]+]]
|
||||
; GCN: {{buffer|flat}}_load_dword [[WIDTH:v[0-9]+]]
|
||||
; GCN: v_sub_i32_e32 [[SUB:v[0-9]+]], vcc, 32, [[WIDTH]]
|
||||
; GCN: v_sub_{{[iu]}}32_e32 [[SUB:v[0-9]+]], vcc, 32, [[WIDTH]]
|
||||
|
||||
; SI-NEXT: v_lshl_b32_e32 [[SHL:v[0-9]+]], [[SRC]], [[SUB]]
|
||||
; SI-NEXT: v_ashr_i32_e32 [[BFE:v[0-9]+]], [[SHL]], [[SUB]]
|
||||
|
|
|
@ -38,11 +38,11 @@ entry:
|
|||
; GCN-DAG: s_add_u32 s32, s32, 0xb00{{$}}
|
||||
|
||||
; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s5 offset:4{{$}}
|
||||
; GCN: v_add_i32_e32 [[ADD0:v[0-9]+]], vcc, 1, [[LOAD0]]
|
||||
; GCN: v_add_{{[iu]}}32_e32 [[ADD0:v[0-9]+]], vcc, 1, [[LOAD0]]
|
||||
; GCN: buffer_store_dword [[ADD0]], off, s[0:3], s5 offset:4{{$}}
|
||||
|
||||
; GCN: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s5 offset:20{{$}}
|
||||
; GCN: v_add_i32_e32 [[ADD1:v[0-9]+]], vcc, 2, [[LOAD1]]
|
||||
; GCN: v_add_{{[iu]}}32_e32 [[ADD1:v[0-9]+]], vcc, 2, [[LOAD1]]
|
||||
|
||||
; GCN: s_swappc_b64
|
||||
|
||||
|
|
|
@ -112,7 +112,7 @@ define amdgpu_kernel void @v_ctlz_v4i32(<4 x i32> addrspace(1)* noalias %out, <4
|
|||
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 32, [[FFBH]], vcc
|
||||
|
||||
; SI: v_subrev_i32_e32 [[RESULT:v[0-9]+]], vcc, 24, [[SELECT]]
|
||||
; VI: v_add_i32_e32 [[RESULT:v[0-9]+]], vcc, -16, [[SELECT]]
|
||||
; VI: v_add_u32_e32 [[RESULT:v[0-9]+]], vcc, -16, [[SELECT]]
|
||||
; GCN: buffer_store_byte [[RESULT]],
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @v_ctlz_i8(i8 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %valptr) nounwind {
|
||||
|
@ -151,7 +151,7 @@ define amdgpu_kernel void @s_ctlz_i64_trunc(i32 addrspace(1)* noalias %out, i64
|
|||
; GCN-DAG: {{buffer|flat}}_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
|
||||
; GCN-DAG: v_cmp_eq_u32_e32 vcc, 0, v[[HI]]
|
||||
; GCN-DAG: v_ffbh_u32_e32 [[FFBH_LO:v[0-9]+]], v[[LO]]
|
||||
; GCN-DAG: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, 32, [[FFBH_LO]]
|
||||
; GCN-DAG: v_add_{{[iu]}}32_e32 [[ADD:v[0-9]+]], vcc, 32, [[FFBH_LO]]
|
||||
; GCN-DAG: v_ffbh_u32_e32 [[FFBH_HI:v[0-9]+]], v[[HI]]
|
||||
; GCN-DAG: v_cndmask_b32_e32 v[[CTLZ:[0-9]+]], [[FFBH_HI]], [[ADD]], vcc
|
||||
; GCN-DAG: v_or_b32_e32 [[OR:v[0-9]+]], v[[LO]], v[[HI]]
|
||||
|
|
|
@ -126,7 +126,7 @@ define amdgpu_kernel void @s_ctlz_zero_undef_i64_trunc(i32 addrspace(1)* noalias
|
|||
; GCN-DAG: {{buffer|flat}}_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
|
||||
; GCN-DAG: v_cmp_eq_u32_e32 vcc, 0, v[[HI]]
|
||||
; GCN-DAG: v_ffbh_u32_e32 [[FFBH_LO:v[0-9]+]], v[[LO]]
|
||||
; GCN-DAG: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, 32, [[FFBH_LO]]
|
||||
; GCN-DAG: v_add_{{[iu]}}32_e32 [[ADD:v[0-9]+]], vcc, 32, [[FFBH_LO]]
|
||||
; GCN-DAG: v_ffbh_u32_e32 [[FFBH_HI:v[0-9]+]], v[[HI]]
|
||||
; GCN-DAG: v_cndmask_b32_e32 v[[CTLZ:[0-9]+]], [[FFBH_HI]], [[FFBH_LO]]
|
||||
; GCN: {{buffer|flat}}_store_dwordx2 {{.*}}v{{\[}}[[CTLZ]]:[[CTLZ_HI:[0-9]+]]{{\]}}
|
||||
|
|
|
@ -188,7 +188,7 @@ define amdgpu_kernel void @s_ctpop_i65(i32 addrspace(1)* noalias %out, i65 %val)
|
|||
; GCN-DAG: v_bcnt_u32_b32{{(_e64)*}} [[MIDRESULT2:v[0-9]+]], v[[VAL0]], 0
|
||||
; GCN-DAG: v_bcnt_u32_b32{{(_e32)*(_e64)*}} [[MIDRESULT3:v[0-9]+]], v{{[0-9]+}}, [[MIDRESULT2]]
|
||||
|
||||
; GCN: v_add_i32_e32 [[RESULT:v[0-9]+]], vcc, [[MIDRESULT1]], [[MIDRESULT2]]
|
||||
; GCN: v_add_{{[iu]}}32_e32 [[RESULT:v[0-9]+]], vcc, [[MIDRESULT1]], [[MIDRESULT2]]
|
||||
|
||||
; GCN: buffer_store_dword [[RESULT]],
|
||||
; GCN: s_endpgm
|
||||
|
|
|
@ -169,7 +169,7 @@ define amdgpu_kernel void @load_v8i8_to_v8f32(<8 x float> addrspace(1)* noalias
|
|||
|
||||
; GCN-LABEL: {{^}}i8_zext_inreg_i32_to_f32:
|
||||
; GCN: {{buffer|flat}}_load_dword [[LOADREG:v[0-9]+]],
|
||||
; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, 2, [[LOADREG]]
|
||||
; GCN: v_add_{{[iu]}}32_e32 [[ADD:v[0-9]+]], vcc, 2, [[LOADREG]]
|
||||
; GCN-NEXT: v_cvt_f32_ubyte0_e32 [[CONV:v[0-9]+]], [[ADD]]
|
||||
; GCN: buffer_store_dword [[CONV]],
|
||||
define amdgpu_kernel void @i8_zext_inreg_i32_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
|
||||
|
|
|
@ -4,12 +4,12 @@
|
|||
; GCN-LABEL: ds_read32_combine_stride_400:
|
||||
; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0
|
||||
; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]]
|
||||
; GCN-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]]
|
||||
; GCN-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]]
|
||||
; GCN-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]]
|
||||
; GFX9-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]]
|
||||
; GFX9-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]]
|
||||
; GFX9-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]]
|
||||
; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]]
|
||||
; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]]
|
||||
; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]]
|
||||
; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]]
|
||||
; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]]
|
||||
; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]]
|
||||
; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset1:100
|
||||
; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B1]] offset1:100
|
||||
; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B2]] offset1:100
|
||||
|
@ -46,12 +46,12 @@ bb:
|
|||
; GCN-LABEL: ds_read32_combine_stride_400_back:
|
||||
; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0
|
||||
; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]]
|
||||
; GCN-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]]
|
||||
; GCN-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]]
|
||||
; GCN-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]]
|
||||
; GFX9-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]]
|
||||
; GFX9-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]]
|
||||
; GFX9-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]]
|
||||
; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]]
|
||||
; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]]
|
||||
; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]]
|
||||
; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]]
|
||||
; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]]
|
||||
; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]]
|
||||
; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset1:100
|
||||
; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B1]] offset1:100
|
||||
; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B2]] offset1:100
|
||||
|
@ -124,12 +124,12 @@ bb:
|
|||
; GCN-LABEL: ds_read32_combine_stride_8192_shifted:
|
||||
; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0
|
||||
; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]]
|
||||
; GCN-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]]
|
||||
; GCN-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x4008, [[BASE]]
|
||||
; GCN-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x8008, [[BASE]]
|
||||
; GFX9-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]]
|
||||
; GFX9-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x4008, [[BASE]]
|
||||
; GFX9-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x8008, [[BASE]]
|
||||
; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]]
|
||||
; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x4008, [[BASE]]
|
||||
; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x8008, [[BASE]]
|
||||
; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]]
|
||||
; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x4008, [[BASE]]
|
||||
; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x8008, [[BASE]]
|
||||
; GCN-DAG: ds_read2st64_b32 v[{{[0-9]+:[0-9]+}}], [[B1]] offset1:32
|
||||
; GCN-DAG: ds_read2st64_b32 v[{{[0-9]+:[0-9]+}}], [[B2]] offset1:32
|
||||
; GCN-DAG: ds_read2st64_b32 v[{{[0-9]+:[0-9]+}}], [[B3]] offset1:32
|
||||
|
@ -160,8 +160,8 @@ bb:
|
|||
; GCN-LABEL: ds_read64_combine_stride_400:
|
||||
; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0
|
||||
; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]]
|
||||
; GCN-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x960, [[BASE]]
|
||||
; GFX9-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x960, [[BASE]]
|
||||
; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 0x960, [[BASE]]
|
||||
; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 0x960, [[BASE]]
|
||||
; GCN-DAG: ds_read2_b64 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset1:50
|
||||
; GCN-DAG: ds_read2_b64 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset0:100 offset1:150
|
||||
; GCN-DAG: ds_read2_b64 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset0:200 offset1:250
|
||||
|
@ -198,12 +198,12 @@ bb:
|
|||
; GCN-LABEL: ds_read64_combine_stride_8192_shifted:
|
||||
; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0
|
||||
; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]]
|
||||
; GCN-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]]
|
||||
; GCN-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x4008, [[BASE]]
|
||||
; GCN-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x8008, [[BASE]]
|
||||
; GFX9-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]]
|
||||
; GFX9-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x4008, [[BASE]]
|
||||
; GFX9-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x8008, [[BASE]]
|
||||
; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]]
|
||||
; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x4008, [[BASE]]
|
||||
; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x8008, [[BASE]]
|
||||
; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]]
|
||||
; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x4008, [[BASE]]
|
||||
; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x8008, [[BASE]]
|
||||
; GCN-DAG: ds_read2st64_b64 v[{{[0-9]+:[0-9]+}}], [[B1]] offset1:16
|
||||
; GCN-DAG: ds_read2st64_b64 v[{{[0-9]+:[0-9]+}}], [[B2]] offset1:16
|
||||
; GCN-DAG: ds_read2st64_b64 v[{{[0-9]+:[0-9]+}}], [[B3]] offset1:16
|
||||
|
@ -234,12 +234,12 @@ bb:
|
|||
; GCN-LABEL: ds_write32_combine_stride_400:
|
||||
; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0
|
||||
; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]]
|
||||
; GCN-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]]
|
||||
; GCN-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]]
|
||||
; GCN-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]]
|
||||
; GFX9-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]]
|
||||
; GFX9-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]]
|
||||
; GFX9-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]]
|
||||
; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]]
|
||||
; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]]
|
||||
; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]]
|
||||
; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]]
|
||||
; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]]
|
||||
; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]]
|
||||
; GCN-DAG: ds_write2_b32 [[BASE]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100
|
||||
; GCN-DAG: ds_write2_b32 [[B1]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100
|
||||
; GCN-DAG: ds_write2_b32 [[B2]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100
|
||||
|
@ -267,12 +267,12 @@ bb:
|
|||
; GCN-LABEL: ds_write32_combine_stride_400_back:
|
||||
; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0
|
||||
; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]]
|
||||
; GCN-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]]
|
||||
; GCN-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]]
|
||||
; GCN-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]]
|
||||
; GFX9-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]]
|
||||
; GFX9-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]]
|
||||
; GFX9-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]]
|
||||
; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]]
|
||||
; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]]
|
||||
; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]]
|
||||
; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]]
|
||||
; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]]
|
||||
; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]]
|
||||
; GCN-DAG: ds_write2_b32 [[BASE]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100
|
||||
; GCN-DAG: ds_write2_b32 [[B1]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100
|
||||
; GCN-DAG: ds_write2_b32 [[B2]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100
|
||||
|
@ -327,12 +327,12 @@ bb:
|
|||
; GCN-LABEL: ds_write32_combine_stride_8192_shifted:
|
||||
; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0
|
||||
; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]]
|
||||
; GCN-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 4, [[BASE]]
|
||||
; GCN-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x4004, [[BASE]]
|
||||
; GCN-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x8004, [[BASE]]
|
||||
; GFX9-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 4, [[BASE]]
|
||||
; GFX9-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x4004, [[BASE]]
|
||||
; GFX9-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x8004, [[BASE]]
|
||||
; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 4, [[BASE]]
|
||||
; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x4004, [[BASE]]
|
||||
; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x8004, [[BASE]]
|
||||
; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 4, [[BASE]]
|
||||
; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x4004, [[BASE]]
|
||||
; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x8004, [[BASE]]
|
||||
; GCN-DAG: ds_write2st64_b32 [[B1]], v{{[0-9]+}}, v{{[0-9]+}} offset1:32
|
||||
; GCN-DAG: ds_write2st64_b32 [[B2]], v{{[0-9]+}}, v{{[0-9]+}} offset1:32
|
||||
; GCN-DAG: ds_write2st64_b32 [[B3]], v{{[0-9]+}}, v{{[0-9]+}} offset1:32
|
||||
|
@ -356,8 +356,8 @@ bb:
|
|||
; GCN-LABEL: ds_write64_combine_stride_400:
|
||||
; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0
|
||||
; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]]
|
||||
; GCN-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x960, [[BASE]]
|
||||
; GFX9-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x960, [[BASE]]
|
||||
; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 0x960, [[BASE]]
|
||||
; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 0x960, [[BASE]]
|
||||
; GCN-DAG: ds_write2_b64 [[BASE]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset1:50
|
||||
; GCN-DAG: ds_write2_b64 [[BASE]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset0:100 offset1:150
|
||||
; GCN-DAG: ds_write2_b64 [[BASE]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset0:200 offset1:250
|
||||
|
@ -385,12 +385,12 @@ bb:
|
|||
; GCN-LABEL: ds_write64_combine_stride_8192_shifted:
|
||||
; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0
|
||||
; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]]
|
||||
; GCN-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]]
|
||||
; GCN-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x4008, [[BASE]]
|
||||
; GCN-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x8008, [[BASE]]
|
||||
; GFX9-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]]
|
||||
; GFX9-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x4008, [[BASE]]
|
||||
; GFX9-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x8008, [[BASE]]
|
||||
; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]]
|
||||
; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x4008, [[BASE]]
|
||||
; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x8008, [[BASE]]
|
||||
; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]]
|
||||
; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x4008, [[BASE]]
|
||||
; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x8008, [[BASE]]
|
||||
; GCN-DAG: ds_write2st64_b64 [[B1]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset1:16
|
||||
; GCN-DAG: ds_write2st64_b64 [[B2]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset1:16
|
||||
; GCN-DAG: ds_write2st64_b64 [[B3]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset1:16
|
||||
|
|
|
@ -53,7 +53,7 @@ define amdgpu_kernel void @test_local(i32 addrspace(1)*) {
|
|||
}
|
||||
|
||||
; GCN-LABEL: {{^}}test_global
|
||||
; GCN: v_add_i32_e32 v{{[0-9]+}}, vcc, 0x888, v{{[0-9]+}}
|
||||
; GCN: v_add_u32_e32 v{{[0-9]+}}, vcc, 0x888, v{{[0-9]+}}
|
||||
; GCN: flat_store_dword
|
||||
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
|
||||
; GCN-NEXT: s_barrier
|
||||
|
|
|
@ -24,7 +24,7 @@ define void @void_func_i1_zeroext(i1 zeroext %arg0) #0 {
|
|||
|
||||
; GCN-LABEL: {{^}}void_func_i1_signext:
|
||||
; GCN: s_waitcnt
|
||||
; GCN-NEXT: v_add_i32_e32 v0, vcc, 12, v0
|
||||
; GCN-NEXT: v_add_{{[_coiu]*}}32_e32 v0, vcc, 12, v0
|
||||
; GCN-NOT: v0
|
||||
; GCN: buffer_store_dword v0, off
|
||||
define void @void_func_i1_signext(i1 signext %arg0) #0 {
|
||||
|
@ -60,7 +60,7 @@ define void @void_func_i8(i8 %arg0) #0 {
|
|||
|
||||
; GCN-LABEL: {{^}}void_func_i8_zeroext:
|
||||
; GCN-NOT: and_b32
|
||||
; GCN: v_add_i32_e32 v0, vcc, 12, v0
|
||||
; GCN: v_add_{{[_coiu]*}}32_e32 v0, vcc, 12, v0
|
||||
define void @void_func_i8_zeroext(i8 zeroext %arg0) #0 {
|
||||
%ext = zext i8 %arg0 to i32
|
||||
%add = add i32 %ext, 12
|
||||
|
@ -70,7 +70,7 @@ define void @void_func_i8_zeroext(i8 zeroext %arg0) #0 {
|
|||
|
||||
; GCN-LABEL: {{^}}void_func_i8_signext:
|
||||
; GCN-NOT: v_bfe_i32
|
||||
; GCN: v_add_i32_e32 v0, vcc, 12, v0
|
||||
; GCN: v_add_{{[_coiu]*}}32_e32 v0, vcc, 12, v0
|
||||
define void @void_func_i8_signext(i8 signext %arg0) #0 {
|
||||
%ext = sext i8 %arg0 to i32
|
||||
%add = add i32 %ext, 12
|
||||
|
@ -87,7 +87,7 @@ define void @void_func_i16(i16 %arg0) #0 {
|
|||
|
||||
; GCN-LABEL: {{^}}void_func_i16_zeroext:
|
||||
; GCN-NOT: v0
|
||||
; GCN: v_add_i32_e32 v0, vcc, 12, v0
|
||||
; GCN: v_add_{{[_coiu]*}}32_e32 v0, vcc, 12, v0
|
||||
define void @void_func_i16_zeroext(i16 zeroext %arg0) #0 {
|
||||
%ext = zext i16 %arg0 to i32
|
||||
%add = add i32 %ext, 12
|
||||
|
@ -97,7 +97,7 @@ define void @void_func_i16_zeroext(i16 zeroext %arg0) #0 {
|
|||
|
||||
; GCN-LABEL: {{^}}void_func_i16_signext:
|
||||
; GCN-NOT: v0
|
||||
; GCN: v_add_i32_e32 v0, vcc, 12, v0
|
||||
; GCN: v_add_{{[_coiu]*}}32_e32 v0, vcc, 12, v0
|
||||
define void @void_func_i16_signext(i16 signext %arg0) #0 {
|
||||
%ext = sext i16 %arg0 to i32
|
||||
%add = add i32 %ext, 12
|
||||
|
|
|
@ -396,100 +396,100 @@ define void @void_func_sret_struct_i8_i32({ i8, i32 }* sret %arg0) #0 {
|
|||
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen{{$}}
|
||||
|
||||
; GCN-DAG: v_add_i32_e32 [[ADD_4:v[0-9]+]], vcc, 4, v0
|
||||
; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_4:v[0-9]+]], vcc, 4, v0
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_4]], s[0:3], s4 offen{{$}}
|
||||
|
||||
; GCN-DAG: v_add_i32_e32 [[ADD_8:v[0-9]+]], vcc, 8, v0
|
||||
; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_8:v[0-9]+]], vcc, 8, v0
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_8]], s[0:3], s4 offen{{$}}
|
||||
|
||||
; GCN-DAG: v_add_i32_e32 [[ADD_12:v[0-9]+]], vcc, 12, v0
|
||||
; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_12:v[0-9]+]], vcc, 12, v0
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_12]], s[0:3], s4 offen{{$}}
|
||||
|
||||
; GCN-DAG: v_add_i32_e32 [[ADD_16:v[0-9]+]], vcc, 16, v0
|
||||
; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_16:v[0-9]+]], vcc, 16, v0
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_16]], s[0:3], s4 offen{{$}}
|
||||
|
||||
; GCN-DAG: v_add_i32_e32 [[ADD_20:v[0-9]+]], vcc, 20, v0
|
||||
; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_20:v[0-9]+]], vcc, 20, v0
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_20]], s[0:3], s4 offen{{$}}
|
||||
|
||||
; GCN-DAG: v_add_i32_e32 [[ADD_24:v[0-9]+]], vcc, 24, v0
|
||||
; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_24:v[0-9]+]], vcc, 24, v0
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_24]], s[0:3], s4 offen{{$}}
|
||||
|
||||
; GCN-DAG: v_add_i32_e32 [[ADD_28:v[0-9]+]], vcc, 28, v0
|
||||
; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_28:v[0-9]+]], vcc, 28, v0
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_28]], s[0:3], s4 offen{{$}}
|
||||
|
||||
; GCN-DAG: v_add_i32_e32 [[ADD_32:v[0-9]+]], vcc, 32, v0
|
||||
; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_32:v[0-9]+]], vcc, 32, v0
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_32]], s[0:3], s4 offen{{$}}
|
||||
|
||||
; GCN-DAG: v_add_i32_e32 [[ADD_36:v[0-9]+]], vcc, 36, v0
|
||||
; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_36:v[0-9]+]], vcc, 36, v0
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_36]], s[0:3], s4 offen{{$}}
|
||||
|
||||
; GCN-DAG: v_add_i32_e32 [[ADD_40:v[0-9]+]], vcc, 40, v0
|
||||
; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_40:v[0-9]+]], vcc, 40, v0
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_40]], s[0:3], s4 offen{{$}}
|
||||
|
||||
; GCN-DAG: v_add_i32_e32 [[ADD_44:v[0-9]+]], vcc, 44, v0
|
||||
; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_44:v[0-9]+]], vcc, 44, v0
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_44]], s[0:3], s4 offen{{$}}
|
||||
|
||||
; GCN-DAG: v_add_i32_e32 [[ADD_48:v[0-9]+]], vcc, 48, v0
|
||||
; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_48:v[0-9]+]], vcc, 48, v0
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_48]], s[0:3], s4 offen{{$}}
|
||||
|
||||
; GCN-DAG: v_add_i32_e32 [[ADD_52:v[0-9]+]], vcc, 52, v0
|
||||
; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_52:v[0-9]+]], vcc, 52, v0
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_52]], s[0:3], s4 offen{{$}}
|
||||
|
||||
; GCN-DAG: v_add_i32_e32 [[ADD_56:v[0-9]+]], vcc, 56, v0
|
||||
; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_56:v[0-9]+]], vcc, 56, v0
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_56]], s[0:3], s4 offen{{$}}
|
||||
|
||||
; GCN-DAG: v_add_i32_e32 [[ADD_60:v[0-9]+]], vcc, 60, v0
|
||||
; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_60:v[0-9]+]], vcc, 60, v0
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_60]], s[0:3], s4 offen{{$}}
|
||||
|
||||
; GCN-DAG: v_add_i32_e32 [[ADD_64:v[0-9]+]], vcc, 64, v0
|
||||
; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_64:v[0-9]+]], vcc, 64, v0
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_64]], s[0:3], s4 offen{{$}}
|
||||
|
||||
; GCN-DAG: v_add_i32_e32 [[ADD_68:v[0-9]+]], vcc, 0x44, v0
|
||||
; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_68:v[0-9]+]], vcc, 0x44, v0
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_68]], s[0:3], s4 offen{{$}}
|
||||
|
||||
; GCN-DAG: v_add_i32_e32 [[ADD_72:v[0-9]+]], vcc, 0x48, v0
|
||||
; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_72:v[0-9]+]], vcc, 0x48, v0
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_72]], s[0:3], s4 offen{{$}}
|
||||
|
||||
; GCN-DAG: v_add_i32_e32 [[ADD_76:v[0-9]+]], vcc, 0x4c, v0
|
||||
; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_76:v[0-9]+]], vcc, 0x4c, v0
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_76]], s[0:3], s4 offen{{$}}
|
||||
|
||||
; GCN-DAG: v_add_i32_e32 [[ADD_80:v[0-9]+]], vcc, 0x50, v0
|
||||
; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_80:v[0-9]+]], vcc, 0x50, v0
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_80]], s[0:3], s4 offen{{$}}
|
||||
|
||||
; GCN-DAG: v_add_i32_e32 [[ADD_84:v[0-9]+]], vcc, 0x54, v0
|
||||
; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_84:v[0-9]+]], vcc, 0x54, v0
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_84]], s[0:3], s4 offen{{$}}
|
||||
|
||||
; GCN-DAG: v_add_i32_e32 [[ADD_88:v[0-9]+]], vcc, 0x58, v0
|
||||
; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_88:v[0-9]+]], vcc, 0x58, v0
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_88]], s[0:3], s4 offen{{$}}
|
||||
|
||||
; GCN-DAG: v_add_i32_e32 [[ADD_92:v[0-9]+]], vcc, 0x5c, v0
|
||||
; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_92:v[0-9]+]], vcc, 0x5c, v0
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_92]], s[0:3], s4 offen{{$}}
|
||||
|
||||
; GCN-DAG: v_add_i32_e32 [[ADD_96:v[0-9]+]], vcc, 0x60, v0
|
||||
; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_96:v[0-9]+]], vcc, 0x60, v0
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_96]], s[0:3], s4 offen{{$}}
|
||||
|
||||
; GCN-DAG: v_add_i32_e32 [[ADD_100:v[0-9]+]], vcc, 0x64, v0
|
||||
; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_100:v[0-9]+]], vcc, 0x64, v0
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_100]], s[0:3], s4 offen{{$}}
|
||||
|
||||
; GCN-DAG: v_add_i32_e32 [[ADD_104:v[0-9]+]], vcc, 0x68, v0
|
||||
; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_104:v[0-9]+]], vcc, 0x68, v0
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_104]], s[0:3], s4 offen{{$}}
|
||||
|
||||
; GCN-DAG: v_add_i32_e32 [[ADD_108:v[0-9]+]], vcc, 0x6c, v0
|
||||
; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_108:v[0-9]+]], vcc, 0x6c, v0
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_108]], s[0:3], s4 offen{{$}}
|
||||
|
||||
; GCN-DAG: v_add_i32_e32 [[ADD_112:v[0-9]+]], vcc, 0x70, v0
|
||||
; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_112:v[0-9]+]], vcc, 0x70, v0
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_112]], s[0:3], s4 offen{{$}}
|
||||
|
||||
; GCN-DAG: v_add_i32_e32 [[ADD_116:v[0-9]+]], vcc, 0x74, v0
|
||||
; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_116:v[0-9]+]], vcc, 0x74, v0
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_116]], s[0:3], s4 offen{{$}}
|
||||
|
||||
; GCN-DAG: v_add_i32_e32 [[ADD_120:v[0-9]+]], vcc, 0x78, v0
|
||||
; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_120:v[0-9]+]], vcc, 0x78, v0
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_120]], s[0:3], s4 offen{{$}}
|
||||
|
||||
; GCN-DAG: v_add_i32_e32 [[ADD_124:v[0-9]+]], vcc, 0x7c, v0
|
||||
; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_124:v[0-9]+]], vcc, 0x7c, v0
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_124]], s[0:3], s4 offen{{$}}
|
||||
|
||||
; GCN-DAG: v_add_i32_e32 [[ADD_128:v[0-9]+]], vcc, 0x80, v0
|
||||
; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_128:v[0-9]+]], vcc, 0x80, v0
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_128]], s[0:3], s4 offen{{$}}
|
||||
|
||||
; GCN: buffer_load_dword v34
|
||||
|
@ -510,100 +510,100 @@ define <33 x i32> @v33i32_func_void() #0 {
|
|||
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen{{$}}
|
||||
|
||||
; GCN-DAG: v_add_i32_e32 [[ADD_4:v[0-9]+]], vcc, 4, v0
|
||||
; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_4:v[0-9]+]], vcc, 4, v0
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_4]], s[0:3], s4 offen{{$}}
|
||||
|
||||
; GCN-DAG: v_add_i32_e32 [[ADD_8:v[0-9]+]], vcc, 8, v0
|
||||
; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_8:v[0-9]+]], vcc, 8, v0
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_8]], s[0:3], s4 offen{{$}}
|
||||
|
||||
; GCN-DAG: v_add_i32_e32 [[ADD_12:v[0-9]+]], vcc, 12, v0
|
||||
; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_12:v[0-9]+]], vcc, 12, v0
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_12]], s[0:3], s4 offen{{$}}
|
||||
|
||||
; GCN-DAG: v_add_i32_e32 [[ADD_16:v[0-9]+]], vcc, 16, v0
|
||||
; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_16:v[0-9]+]], vcc, 16, v0
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_16]], s[0:3], s4 offen{{$}}
|
||||
|
||||
; GCN-DAG: v_add_i32_e32 [[ADD_20:v[0-9]+]], vcc, 20, v0
|
||||
; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_20:v[0-9]+]], vcc, 20, v0
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_20]], s[0:3], s4 offen{{$}}
|
||||
|
||||
; GCN-DAG: v_add_i32_e32 [[ADD_24:v[0-9]+]], vcc, 24, v0
|
||||
; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_24:v[0-9]+]], vcc, 24, v0
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_24]], s[0:3], s4 offen{{$}}
|
||||
|
||||
; GCN-DAG: v_add_i32_e32 [[ADD_28:v[0-9]+]], vcc, 28, v0
|
||||
; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_28:v[0-9]+]], vcc, 28, v0
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_28]], s[0:3], s4 offen{{$}}
|
||||
|
||||
; GCN-DAG: v_add_i32_e32 [[ADD_32:v[0-9]+]], vcc, 32, v0
|
||||
; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_32:v[0-9]+]], vcc, 32, v0
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_32]], s[0:3], s4 offen{{$}}
|
||||
|
||||
; GCN-DAG: v_add_i32_e32 [[ADD_36:v[0-9]+]], vcc, 36, v0
|
||||
; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_36:v[0-9]+]], vcc, 36, v0
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_36]], s[0:3], s4 offen{{$}}
|
||||
|
||||
; GCN-DAG: v_add_i32_e32 [[ADD_40:v[0-9]+]], vcc, 40, v0
|
||||
; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_40:v[0-9]+]], vcc, 40, v0
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_40]], s[0:3], s4 offen{{$}}
|
||||
|
||||
; GCN-DAG: v_add_i32_e32 [[ADD_44:v[0-9]+]], vcc, 44, v0
|
||||
; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_44:v[0-9]+]], vcc, 44, v0
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_44]], s[0:3], s4 offen{{$}}
|
||||
|
||||
; GCN-DAG: v_add_i32_e32 [[ADD_48:v[0-9]+]], vcc, 48, v0
|
||||
; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_48:v[0-9]+]], vcc, 48, v0
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_48]], s[0:3], s4 offen{{$}}
|
||||
|
||||
; GCN-DAG: v_add_i32_e32 [[ADD_52:v[0-9]+]], vcc, 52, v0
|
||||
; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_52:v[0-9]+]], vcc, 52, v0
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_52]], s[0:3], s4 offen{{$}}
|
||||
|
||||
; GCN-DAG: v_add_i32_e32 [[ADD_56:v[0-9]+]], vcc, 56, v0
|
||||
; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_56:v[0-9]+]], vcc, 56, v0
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_56]], s[0:3], s4 offen{{$}}
|
||||
|
||||
; GCN-DAG: v_add_i32_e32 [[ADD_60:v[0-9]+]], vcc, 60, v0
|
||||
; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_60:v[0-9]+]], vcc, 60, v0
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_60]], s[0:3], s4 offen{{$}}
|
||||
|
||||
; GCN-DAG: v_add_i32_e32 [[ADD_64:v[0-9]+]], vcc, 64, v0
|
||||
; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_64:v[0-9]+]], vcc, 64, v0
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_64]], s[0:3], s4 offen{{$}}
|
||||
|
||||
; GCN-DAG: v_add_i32_e32 [[ADD_68:v[0-9]+]], vcc, 0x44, v0
|
||||
; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_68:v[0-9]+]], vcc, 0x44, v0
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_68]], s[0:3], s4 offen{{$}}
|
||||
|
||||
; GCN-DAG: v_add_i32_e32 [[ADD_72:v[0-9]+]], vcc, 0x48, v0
|
||||
; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_72:v[0-9]+]], vcc, 0x48, v0
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_72]], s[0:3], s4 offen{{$}}
|
||||
|
||||
; GCN-DAG: v_add_i32_e32 [[ADD_76:v[0-9]+]], vcc, 0x4c, v0
|
||||
; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_76:v[0-9]+]], vcc, 0x4c, v0
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_76]], s[0:3], s4 offen{{$}}
|
||||
|
||||
; GCN-DAG: v_add_i32_e32 [[ADD_80:v[0-9]+]], vcc, 0x50, v0
|
||||
; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_80:v[0-9]+]], vcc, 0x50, v0
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_80]], s[0:3], s4 offen{{$}}
|
||||
|
||||
; GCN-DAG: v_add_i32_e32 [[ADD_84:v[0-9]+]], vcc, 0x54, v0
|
||||
; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_84:v[0-9]+]], vcc, 0x54, v0
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_84]], s[0:3], s4 offen{{$}}
|
||||
|
||||
; GCN-DAG: v_add_i32_e32 [[ADD_88:v[0-9]+]], vcc, 0x58, v0
|
||||
; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_88:v[0-9]+]], vcc, 0x58, v0
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_88]], s[0:3], s4 offen{{$}}
|
||||
|
||||
; GCN-DAG: v_add_i32_e32 [[ADD_92:v[0-9]+]], vcc, 0x5c, v0
|
||||
; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_92:v[0-9]+]], vcc, 0x5c, v0
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_92]], s[0:3], s4 offen{{$}}
|
||||
|
||||
; GCN-DAG: v_add_i32_e32 [[ADD_96:v[0-9]+]], vcc, 0x60, v0
|
||||
; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_96:v[0-9]+]], vcc, 0x60, v0
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_96]], s[0:3], s4 offen{{$}}
|
||||
|
||||
; GCN-DAG: v_add_i32_e32 [[ADD_100:v[0-9]+]], vcc, 0x64, v0
|
||||
; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_100:v[0-9]+]], vcc, 0x64, v0
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_100]], s[0:3], s4 offen{{$}}
|
||||
|
||||
; GCN-DAG: v_add_i32_e32 [[ADD_104:v[0-9]+]], vcc, 0x68, v0
|
||||
; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_104:v[0-9]+]], vcc, 0x68, v0
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_104]], s[0:3], s4 offen{{$}}
|
||||
|
||||
; GCN-DAG: v_add_i32_e32 [[ADD_108:v[0-9]+]], vcc, 0x6c, v0
|
||||
; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_108:v[0-9]+]], vcc, 0x6c, v0
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_108]], s[0:3], s4 offen{{$}}
|
||||
|
||||
; GCN-DAG: v_add_i32_e32 [[ADD_112:v[0-9]+]], vcc, 0x70, v0
|
||||
; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_112:v[0-9]+]], vcc, 0x70, v0
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_112]], s[0:3], s4 offen{{$}}
|
||||
|
||||
; GCN-DAG: v_add_i32_e32 [[ADD_116:v[0-9]+]], vcc, 0x74, v0
|
||||
; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_116:v[0-9]+]], vcc, 0x74, v0
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_116]], s[0:3], s4 offen{{$}}
|
||||
|
||||
; GCN-DAG: v_add_i32_e32 [[ADD_120:v[0-9]+]], vcc, 0x78, v0
|
||||
; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_120:v[0-9]+]], vcc, 0x78, v0
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_120]], s[0:3], s4 offen{{$}}
|
||||
|
||||
; GCN-DAG: v_add_i32_e32 [[ADD_124:v[0-9]+]], vcc, 0x7c, v0
|
||||
; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_124:v[0-9]+]], vcc, 0x7c, v0
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_124]], s[0:3], s4 offen{{$}}
|
||||
|
||||
; GCN-DAG: v_add_i32_e32 [[ADD_128:v[0-9]+]], vcc, 0x80, v0
|
||||
; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_128:v[0-9]+]], vcc, 0x80, v0
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_128]], s[0:3], s4 offen{{$}}
|
||||
|
||||
; GCN: buffer_load_dword v34
|
||||
|
@ -623,11 +623,11 @@ define { <32 x i32>, i32 } @struct_v32i32_i32_func_void() #0 {
|
|||
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen{{$}}
|
||||
|
||||
; GCN-DAG: v_add_i32_e32 [[ADD_128:v[0-9]+]], vcc, 0x80, v0
|
||||
; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_128:v[0-9]+]], vcc, 0x80, v0
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_128]], s[0:3], s4 offen{{$}}
|
||||
|
||||
|
||||
; GCN-DAG: v_add_i32_e32 [[ADD_256:v[0-9]+]], vcc, 0xfc, v0
|
||||
; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_256:v[0-9]+]], vcc, 0xfc, v0
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_256]], s[0:3], s4 offen{{$}}
|
||||
|
||||
; GCN: buffer_load_dword v33
|
||||
|
|
|
@ -103,7 +103,7 @@ main_body:
|
|||
}
|
||||
|
||||
;CHECK-LABEL: {{^}}buffer_load_negative_offset:
|
||||
;CHECK: v_add_i32_e32 [[VOFS:v[0-9]+]], vcc, -16, v0
|
||||
;CHECK: v_add_{{[iu]}}32_e32 [[VOFS:v[0-9]+]], vcc, -16, v0
|
||||
;CHECK: buffer_load_dwordx4 v[0:3], [[VOFS]], s[0:3], 0 offen
|
||||
define amdgpu_ps <4 x float> @buffer_load_negative_offset(<4 x i32> inreg, i32 %ofs) {
|
||||
main_body:
|
||||
|
|
|
@ -51,7 +51,7 @@ main_body:
|
|||
; GCN: s_bfm_b64 exec, s1, 0
|
||||
; GCN: s_cmp_eq_u32 s1, 64
|
||||
; GCN: s_cmov_b64 exec, -1
|
||||
; GCN: v_add_i32_e32 v0, vcc, s0, v0
|
||||
; GCN: v_add_co_u32_e32 v0, vcc, s0, v0
|
||||
define amdgpu_ps float @reuse_input(i32 inreg %count, i32 %a) {
|
||||
main_body:
|
||||
call void @llvm.amdgcn.init.exec.from.input(i32 %count, i32 19)
|
||||
|
@ -65,7 +65,7 @@ main_body:
|
|||
; GCN: s_bfm_b64 exec, s1, 0
|
||||
; GCN: s_cmp_eq_u32 s1, 64
|
||||
; GCN: s_cmov_b64 exec, -1
|
||||
; GCN: v_add_i32_e32 v0, vcc, s0, v0
|
||||
; GCN: v_add_co_u32_e32 v0, vcc, s0, v0
|
||||
define amdgpu_ps float @reuse_input2(i32 inreg %count, i32 %a) {
|
||||
main_body:
|
||||
%s = add i32 %a, %count
|
||||
|
|
|
@ -396,7 +396,7 @@ define amdgpu_kernel void @bfe_sext_in_reg_i24(i32 addrspace(1)* %out, i32 addrs
|
|||
; GCN: buffer_load_dword [[LOAD:v[0-9]+]]
|
||||
; GCN: v_bfe_i32 [[BFE:v[0-9]+]], [[LOAD]], 1, 16
|
||||
; GCN: v_lshrrev_b32_e32 [[TMP0:v[0-9]+]], 31, [[BFE]]
|
||||
; GCN: v_add_i32_e32 [[TMP1:v[0-9]+]], vcc, [[TMP0]], [[BFE]]
|
||||
; GCN: v_add_{{[iu]}}32_e32 [[TMP1:v[0-9]+]], vcc, [[TMP0]], [[BFE]]
|
||||
; GCN: v_ashrrev_i32_e32 [[TMP2:v[0-9]+]], 1, [[TMP1]]
|
||||
; GCN: buffer_store_dword [[TMP2]]
|
||||
define amdgpu_kernel void @simplify_demanded_bfe_sdiv(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
|
||||
|
|
|
@ -65,7 +65,7 @@ define amdgpu_kernel void @bfe_u32_zextload_i8(i32 addrspace(1)* %out, i8 addrsp
|
|||
|
||||
; GCN-LABEL: {{^}}bfe_u32_zext_in_reg_i8:
|
||||
; GCN: buffer_load_dword
|
||||
; GCN: v_add_i32
|
||||
; GCN: v_add_{{[iu]}}32
|
||||
; GCN-NEXT: v_and_b32_e32
|
||||
; FIXME: Should be using s_add_i32
|
||||
; GCN-NOT: {{[^@]}}bfe
|
||||
|
@ -81,7 +81,7 @@ define amdgpu_kernel void @bfe_u32_zext_in_reg_i8(i32 addrspace(1)* %out, i32 ad
|
|||
|
||||
; GCN-LABEL: {{^}}bfe_u32_zext_in_reg_i16:
|
||||
; GCN: buffer_load_dword
|
||||
; GCN: v_add_i32
|
||||
; GCN: v_add_{{[iu]}}32
|
||||
; GCN-NEXT: v_and_b32_e32
|
||||
; GCN-NOT: {{[^@]}}bfe
|
||||
; GCN: s_endpgm
|
||||
|
@ -96,7 +96,7 @@ define amdgpu_kernel void @bfe_u32_zext_in_reg_i16(i32 addrspace(1)* %out, i32 a
|
|||
|
||||
; GCN-LABEL: {{^}}bfe_u32_zext_in_reg_i8_offset_1:
|
||||
; GCN: buffer_load_dword
|
||||
; GCN: v_add_i32
|
||||
; GCN: v_add_{{[iu]}}32
|
||||
; GCN: bfe
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
|
||||
|
@ -110,7 +110,7 @@ define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_1(i32 addrspace(1)* %ou
|
|||
|
||||
; GCN-LABEL: {{^}}bfe_u32_zext_in_reg_i8_offset_3:
|
||||
; GCN: buffer_load_dword
|
||||
; GCN: v_add_i32
|
||||
; GCN: v_add_{{[iu]}}32
|
||||
; GCN-NEXT: v_and_b32_e32 {{v[0-9]+}}, 0xf8
|
||||
; GCN-NEXT: bfe
|
||||
; GCN: s_endpgm
|
||||
|
@ -125,7 +125,7 @@ define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_3(i32 addrspace(1)* %ou
|
|||
|
||||
; GCN-LABEL: {{^}}bfe_u32_zext_in_reg_i8_offset_7:
|
||||
; GCN: buffer_load_dword
|
||||
; GCN: v_add_i32
|
||||
; GCN: v_add_{{[iu]}}32
|
||||
; GCN-NEXT: v_and_b32_e32 {{v[0-9]+}}, 0x80
|
||||
; GCN-NEXT: bfe
|
||||
; GCN: s_endpgm
|
||||
|
@ -140,7 +140,7 @@ define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_7(i32 addrspace(1)* %ou
|
|||
|
||||
; GCN-LABEL: {{^}}bfe_u32_zext_in_reg_i16_offset_8:
|
||||
; GCN: buffer_load_dword
|
||||
; GCN: v_add_i32
|
||||
; GCN: v_add_{{[iu]}}32
|
||||
; GCN-NEXT: bfe
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @bfe_u32_zext_in_reg_i16_offset_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
|
||||
|
|
|
@ -87,7 +87,7 @@ define amdgpu_kernel void @v_pack_v2f16(i32 addrspace(1)* %in0, i32 addrspace(1)
|
|||
; GFX9: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VAL0]]
|
||||
; GFX9: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[VAL1]], 16, [[ELT0]]
|
||||
|
||||
; GFX9: v_add_i32_e32 v{{[0-9]+}}, vcc, 9, [[PACKED]]
|
||||
; GFX9: v_add_{{[_coiu]*}}32_e32 v{{[0-9]+}}, vcc, 9, [[PACKED]]
|
||||
define amdgpu_kernel void @v_pack_v2f16_user(i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tid.ext = sext i32 %tid to i64
|
||||
|
|
|
@ -81,7 +81,7 @@ define amdgpu_kernel void @v_pack_v2i16(i32 addrspace(1)* %in0, i32 addrspace(1)
|
|||
; GFX9: v_and_b32_e32 [[MASKED:v[0-9]+]], 0xffff, [[VAL0]]
|
||||
; GFX9: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[VAL1]], 16, [[MASKED]]
|
||||
|
||||
; GFX9: v_add_i32_e32 v{{[0-9]+}}, vcc, 9, [[PACKED]]
|
||||
; GFX9: v_add_co_u32_e32 v{{[0-9]+}}, vcc, 9, [[PACKED]]
|
||||
define amdgpu_kernel void @v_pack_v2i16_user(i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tid.ext = sext i32 %tid to i64
|
||||
|
|
|
@ -20,7 +20,7 @@ entry:
|
|||
; BOTH-LABEL: {{^}}v_rotl_i64:
|
||||
; SI-DAG: v_lshl_b64
|
||||
; VI-DAG: v_lshlrev_b64
|
||||
; BOTH-DAG: v_sub_i32
|
||||
; BOTH-DAG: v_sub_{{[iu]}}32
|
||||
; SI: v_lshr_b64
|
||||
; VI: v_lshrrev_b64
|
||||
; BOTH: v_or_b32
|
||||
|
|
|
@ -17,7 +17,7 @@ entry:
|
|||
}
|
||||
|
||||
; BOTH-LABEL: {{^}}v_rotr_i64:
|
||||
; BOTH-DAG: v_sub_i32
|
||||
; BOTH-DAG: v_sub_{{[iu]}}32
|
||||
; SI-DAG: v_lshr_b64
|
||||
; SI-DAG: v_lshl_b64
|
||||
; VI-DAG: v_lshrrev_b64
|
||||
|
|
|
@ -49,7 +49,7 @@ define amdgpu_kernel void @s_saddo_i64(i64 addrspace(1)* %out, i1 addrspace(1)*
|
|||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_saddo_i64:
|
||||
; SI: v_add_i32
|
||||
; SI: v_add_{{[iu]}}32
|
||||
; SI: v_addc_u32
|
||||
define amdgpu_kernel void @v_saddo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
|
||||
%a = load i64, i64 addrspace(1)* %aptr, align 4
|
||||
|
|
|
@ -50,7 +50,7 @@ done:
|
|||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen{{$}}
|
||||
; This constant isn't folded, because it has multiple uses.
|
||||
; GCN-DAG: v_mov_b32_e32 [[K8000:v[0-9]+]], 0x8004
|
||||
; GCN-DAG: v_add_i32_e32 [[OFFSET:v[0-9]+]], vcc, [[K8000]]
|
||||
; GCN-DAG: v_add_{{[iu]}}32_e32 [[OFFSET:v[0-9]+]], vcc, [[K8000]]
|
||||
; GCN: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen{{$}}
|
||||
|
||||
define amdgpu_kernel void @legal_offset_fi_offset(i32 addrspace(1)* %out, i32 %cond, i32 addrspace(1)* %offsets, i32 %if_offset, i32 %else_offset) {
|
||||
|
@ -87,7 +87,7 @@ done:
|
|||
}
|
||||
|
||||
; GCN-LABEL: {{^}}neg_vaddr_offset_inbounds:
|
||||
; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, 16, v{{[0-9]+}}
|
||||
; GCN: v_add_{{[iu]}}32_e32 [[ADD:v[0-9]+]], vcc, 16, v{{[0-9]+}}
|
||||
; GCN: buffer_store_dword v{{[0-9]+}}, [[ADD]], s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen{{$}}
|
||||
define amdgpu_kernel void @neg_vaddr_offset_inbounds(i32 %offset) {
|
||||
entry:
|
||||
|
@ -99,7 +99,7 @@ entry:
|
|||
}
|
||||
|
||||
; GCN-LABEL: {{^}}neg_vaddr_offset:
|
||||
; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, 16, v{{[0-9]+}}
|
||||
; GCN: v_add_{{[iu]}}32_e32 [[ADD:v[0-9]+]], vcc, 16, v{{[0-9]+}}
|
||||
; GCN: buffer_store_dword v{{[0-9]+}}, [[ADD]], s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen{{$}}
|
||||
define amdgpu_kernel void @neg_vaddr_offset(i32 %offset) {
|
||||
entry:
|
||||
|
|
|
@ -37,10 +37,10 @@ define amdgpu_kernel void @sdiv_i32_4(i32 addrspace(1)* %out, i32 addrspace(1)*
|
|||
; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]],
|
||||
; SI-DAG: v_mov_b32_e32 [[MAGIC:v[0-9]+]], 0x98a1930b
|
||||
; SI: v_mul_hi_i32 [[TMP:v[0-9]+]], [[VAL]], [[MAGIC]]
|
||||
; SI: v_add_i32
|
||||
; SI: v_add_{{[iu]}}32
|
||||
; SI: v_lshrrev_b32
|
||||
; SI: v_ashrrev_i32
|
||||
; SI: v_add_i32
|
||||
; SI: v_add_{{[iu]}}32
|
||||
; SI: buffer_store_dword
|
||||
; SI: s_endpgm
|
||||
define amdgpu_kernel void @slow_sdiv_i32_3435(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
|
||||
|
|
|
@ -4,10 +4,10 @@
|
|||
|
||||
; GCN-LABEL: {{^}}add_shr_i32:
|
||||
; NOSDWA: v_lshrrev_b32_e32 v[[DST:[0-9]+]], 16, v{{[0-9]+}}
|
||||
; NOSDWA: v_add_i32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v[[DST]]
|
||||
; NOSDWA-NOT: v_add_i32_sdwa
|
||||
; NOSDWA: v_add_u32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v[[DST]]
|
||||
; NOSDWA-NOT: v_add_{{[_cou]*}}32_sdwa
|
||||
|
||||
; SDWA: v_add_i32_sdwa v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; SDWA: v_add_{{[_cou]*}}32_sdwa v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
|
||||
define amdgpu_kernel void @add_shr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
|
||||
%a = load i32, i32 addrspace(1)* %in, align 4
|
||||
|
@ -19,10 +19,10 @@ define amdgpu_kernel void @add_shr_i32(i32 addrspace(1)* %out, i32 addrspace(1)*
|
|||
|
||||
; GCN-LABEL: {{^}}sub_shr_i32:
|
||||
; NOSDWA: v_lshrrev_b32_e32 v[[DST:[0-9]+]], 16, v{{[0-9]+}}
|
||||
; NOSDWA: v_subrev_i32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v[[DST]]
|
||||
; NOSDWA-NOT: v_subrev_i32_sdwa
|
||||
; NOSDWA: v_subrev_u32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v[[DST]]
|
||||
; NOSDWA-NOT: v_subrev_{{[_cou]*}}32_sdwa
|
||||
|
||||
; SDWA: v_subrev_i32_sdwa v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
; SDWA: v_subrev_{{[_cou]*}}32_sdwa v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
|
||||
define amdgpu_kernel void @sub_shr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
|
||||
%a = load i32, i32 addrspace(1)* %in, align 4
|
||||
|
@ -426,9 +426,9 @@ entry:
|
|||
}
|
||||
|
||||
; GCN-LABEL: {{^}}add_bb_v2i16:
|
||||
; NOSDWA-NOT: v_add_i32_sdwa
|
||||
; NOSDWA-NOT: v_add_{{[_cou]*}}32_sdwa
|
||||
|
||||
; VI: v_add_i32_sdwa v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
||||
; VI: v_add_u32_sdwa v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
||||
|
||||
; GFX9: v_pk_add_u16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
|
||||
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
; used in an REG_SEQUENCE that also needs to be handled.
|
||||
|
||||
; SI-LABEL: {{^}}test_dup_operands:
|
||||
; SI: v_add_i32_e32
|
||||
; SI: v_add_{{[iu]}}32_e32
|
||||
define amdgpu_kernel void @test_dup_operands(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %in) {
|
||||
%a = load <2 x i32>, <2 x i32> addrspace(1)* %in
|
||||
%lo = extractelement <2 x i32> %a, i32 0
|
||||
|
|
|
@ -321,7 +321,7 @@ ENDIF69: ; preds = %LOOP68
|
|||
; CHECK: s_cmp_eq_u32
|
||||
; CHECK: s_cbranch_scc0 [[END:BB[0-9]+_[0-9]+]]
|
||||
|
||||
; CHECK: v_add_i32_e32 v[[ADD:[0-9]+]], vcc, 1, v{{[0-9]+}}
|
||||
; CHECK: v_add_{{[iu]}}32_e32 v[[ADD:[0-9]+]], vcc, 1, v{{[0-9]+}}
|
||||
|
||||
; [[END]]:
|
||||
; CHECK: image_sample v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+}}:[[ADD]]{{\]}}
|
||||
|
|
|
@ -5,9 +5,9 @@
|
|||
|
||||
; CHECK-LABEL: {{^}}add_const_offset:
|
||||
; CHECK: v_lshlrev_b32_e32 v[[SHL:[0-9]+]], 4, v0
|
||||
; CHECK: v_add_i32_e32 v[[ADD:[0-9]+]], vcc, 0xc80, v[[SHL]]
|
||||
; CHECK: v_add_u32_e32 v[[ADD:[0-9]+]], vcc, 0xc80, v[[SHL]]
|
||||
; CHECK-NOT: v_lshl
|
||||
; CHECK: v_add_i32_e32 v[[ADDRLO:[0-9]+]], vcc, s{{[0-9]+}}, v[[ADD]]
|
||||
; CHECK: v_add_u32_e32 v[[ADDRLO:[0-9]+]], vcc, s{{[0-9]+}}, v[[ADD]]
|
||||
; CHECK: load_dword v{{[0-9]+}}, v{{\[}}[[ADDRLO]]:
|
||||
define amdgpu_kernel void @add_const_offset(i32 addrspace(1)* nocapture %arg) {
|
||||
bb:
|
||||
|
@ -24,7 +24,7 @@ bb:
|
|||
; CHECK: v_lshlrev_b32_e32 v[[SHL:[0-9]+]], 4, v0
|
||||
; CHECK: v_or_b32_e32 v[[OR:[0-9]+]], 0x1000, v[[SHL]]
|
||||
; CHECK-NOT: v_lshl
|
||||
; CHECK: v_add_i32_e32 v[[ADDRLO:[0-9]+]], vcc, s{{[0-9]+}}, v[[OR]]
|
||||
; CHECK: v_add_u32_e32 v[[ADDRLO:[0-9]+]], vcc, s{{[0-9]+}}, v[[OR]]
|
||||
; CHECK: load_dword v{{[0-9]+}}, v{{\[}}[[ADDRLO]]:
|
||||
define amdgpu_kernel void @or_const_offset(i32 addrspace(1)* nocapture %arg) {
|
||||
bb:
|
||||
|
|
|
@ -35,7 +35,7 @@ define amdgpu_kernel void @load_shl_base_lds_0(float addrspace(1)* %out, i32 add
|
|||
; GCN-LABEL: {{^}}load_shl_base_lds_1:
|
||||
; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
|
||||
; GCN: ds_read_b32 [[RESULT:v[0-9]+]], [[PTR]] offset:8
|
||||
; GCN: v_add_i32_e32 [[ADDUSE:v[0-9]+]], vcc, 8, v{{[0-9]+}}
|
||||
; GCN: v_add_{{[iu]}}32_e32 [[ADDUSE:v[0-9]+]], vcc, 8, v{{[0-9]+}}
|
||||
; GCN-DAG: buffer_store_dword [[RESULT]]
|
||||
; GCN-DAG: buffer_store_dword [[ADDUSE]]
|
||||
; GCN: s_endpgm
|
||||
|
@ -301,7 +301,7 @@ define void @shl_add_ptr_combine_2use_lds(i32 %idx) #0 {
|
|||
; GCN-DAG: v_lshlrev_b32_e32 [[SCALE0:v[0-9]+]], 3, v0
|
||||
; GCN-DAG: v_lshlrev_b32_e32 [[SCALE1:v[0-9]+]], 4, v0
|
||||
; GCN-DAG: ds_write_b32 [[SCALE0]], v{{[0-9]+}} offset:65528
|
||||
; GCN-DAG: v_add_i32_e32 [[ADD1:v[0-9]+]], vcc, 0x1fff0, [[SCALE1]]
|
||||
; GCN-DAG: v_add_{{[iu]}}32_e32 [[ADD1:v[0-9]+]], vcc, 0x1fff0, [[SCALE1]]
|
||||
; GCN: ds_write_b32 [[ADD1]], v{{[0-9]+$}}
|
||||
define void @shl_add_ptr_combine_2use_max_lds_offset(i32 %idx) #0 {
|
||||
%idx.add = add nuw i32 %idx, 8191
|
||||
|
@ -315,7 +315,7 @@ define void @shl_add_ptr_combine_2use_max_lds_offset(i32 %idx) #0 {
|
|||
}
|
||||
|
||||
; GCN-LABEL: {{^}}shl_add_ptr_combine_2use_both_max_lds_offset:
|
||||
; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, 0x1000, v0
|
||||
; GCN: v_add_{{[iu]}}32_e32 [[ADD:v[0-9]+]], vcc, 0x1000, v0
|
||||
; GCN-DAG: v_lshlrev_b32_e32 [[SCALE0:v[0-9]+]], 4, [[ADD]]
|
||||
; GCN-DAG: v_lshlrev_b32_e32 [[SCALE1:v[0-9]+]], 5, [[ADD]]
|
||||
; GCN-DAG: ds_write_b32 [[SCALE0]], v{{[0-9]+$}}
|
||||
|
@ -353,7 +353,7 @@ define void @shl_add_ptr_combine_2use_private(i16 zeroext %idx.arg) #0 {
|
|||
; GCN-DAG: v_lshlrev_b32_e32 [[SCALE0:v[0-9]+]], 3, v0
|
||||
; GCN-DAG: v_lshlrev_b32_e32 [[SCALE1:v[0-9]+]], 4, v0
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[SCALE0]], s[0:3], s4 offen offset:4088
|
||||
; GCN-DAG: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, 0x1ff0, [[SCALE1]]
|
||||
; GCN-DAG: v_add_{{[iu]}}32_e32 [[ADD:v[0-9]+]], vcc, 0x1ff0, [[SCALE1]]
|
||||
; GCN: buffer_store_dword v{{[0-9]+}}, [[ADD]], s[0:3], s4 offen{{$}}
|
||||
define void @shl_add_ptr_combine_2use_max_private_offset(i16 zeroext %idx.arg) #0 {
|
||||
%idx = zext i16 %idx.arg to i32
|
||||
|
@ -367,7 +367,7 @@ define void @shl_add_ptr_combine_2use_max_private_offset(i16 zeroext %idx.arg) #
|
|||
ret void
|
||||
}
|
||||
; GCN-LABEL: {{^}}shl_add_ptr_combine_2use_both_max_private_offset:
|
||||
; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, 0x100, v0
|
||||
; GCN: v_add_{{[iu]}}32_e32 [[ADD:v[0-9]+]], vcc, 0x100, v0
|
||||
; GCN-DAG: v_lshlrev_b32_e32 [[SCALE0:v[0-9]+]], 4, [[ADD]]
|
||||
; GCN-DAG: v_lshlrev_b32_e32 [[SCALE1:v[0-9]+]], 5, [[ADD]]
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[SCALE0]], s[0:3], s4 offen{{$}}
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
|
||||
; GCN-LABEL: {{^}}v_test_i32_x_sub_64:
|
||||
; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
|
||||
; GCN: v_subrev_i32_e32 v{{[0-9]+}}, vcc, 64, [[X]]
|
||||
; GCN: v_subrev_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, 64, [[X]]
|
||||
define amdgpu_kernel void @v_test_i32_x_sub_64(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tid.ext = sext i32 %tid to i64
|
||||
|
@ -21,8 +21,8 @@ define amdgpu_kernel void @v_test_i32_x_sub_64(i32 addrspace(1)* %out, i32 addrs
|
|||
; GCN-LABEL: {{^}}v_test_i32_x_sub_64_multi_use:
|
||||
; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
|
||||
; GCN: {{buffer|flat}}_load_dword [[Y:v[0-9]+]]
|
||||
; GCN-DAG: v_subrev_i32_e32 v{{[0-9]+}}, vcc, 64, [[X]]
|
||||
; GCN-DAG: v_subrev_i32_e32 v{{[0-9]+}}, vcc, 64, [[Y]]
|
||||
; GCN-DAG: v_subrev_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, 64, [[X]]
|
||||
; GCN-DAG: v_subrev_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, 64, [[Y]]
|
||||
define amdgpu_kernel void @v_test_i32_x_sub_64_multi_use(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tid.ext = sext i32 %tid to i64
|
||||
|
@ -39,7 +39,7 @@ define amdgpu_kernel void @v_test_i32_x_sub_64_multi_use(i32 addrspace(1)* %out,
|
|||
|
||||
; GCN-LABEL: {{^}}v_test_i32_64_sub_x:
|
||||
; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
|
||||
; GCN: v_sub_i32_e32 v{{[0-9]+}}, vcc, 64, [[X]]
|
||||
; GCN: v_sub_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, 64, [[X]]
|
||||
define amdgpu_kernel void @v_test_i32_64_sub_x(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tid.ext = sext i32 %tid to i64
|
||||
|
@ -53,7 +53,7 @@ define amdgpu_kernel void @v_test_i32_64_sub_x(i32 addrspace(1)* %out, i32 addrs
|
|||
|
||||
; GCN-LABEL: {{^}}v_test_i32_x_sub_65:
|
||||
; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
|
||||
; GCN: v_add_i32_e32 v{{[0-9]+}}, vcc, 0xffffffbf, [[X]]
|
||||
; GCN: v_add_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, 0xffffffbf, [[X]]
|
||||
define amdgpu_kernel void @v_test_i32_x_sub_65(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tid.ext = sext i32 %tid to i64
|
||||
|
@ -67,7 +67,7 @@ define amdgpu_kernel void @v_test_i32_x_sub_65(i32 addrspace(1)* %out, i32 addrs
|
|||
|
||||
; GCN-LABEL: {{^}}v_test_i32_65_sub_x:
|
||||
; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
|
||||
; GCN: v_sub_i32_e32 v{{[0-9]+}}, vcc, 0x41, [[X]]
|
||||
; GCN: v_sub_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, 0x41, [[X]]
|
||||
define amdgpu_kernel void @v_test_i32_65_sub_x(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tid.ext = sext i32 %tid to i64
|
||||
|
@ -81,7 +81,7 @@ define amdgpu_kernel void @v_test_i32_65_sub_x(i32 addrspace(1)* %out, i32 addrs
|
|||
|
||||
; GCN-LABEL: {{^}}v_test_i32_x_sub_neg16:
|
||||
; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
|
||||
; GCN: v_add_i32_e32 v{{[0-9]+}}, vcc, 16, [[X]]
|
||||
; GCN: v_add_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, 16, [[X]]
|
||||
define amdgpu_kernel void @v_test_i32_x_sub_neg16(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tid.ext = sext i32 %tid to i64
|
||||
|
@ -95,7 +95,7 @@ define amdgpu_kernel void @v_test_i32_x_sub_neg16(i32 addrspace(1)* %out, i32 ad
|
|||
|
||||
; GCN-LABEL: {{^}}v_test_i32_neg16_sub_x:
|
||||
; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
|
||||
; GCN: v_sub_i32_e32 v{{[0-9]+}}, vcc, -16, [[X]]
|
||||
; GCN: v_sub_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, -16, [[X]]
|
||||
define amdgpu_kernel void @v_test_i32_neg16_sub_x(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tid.ext = sext i32 %tid to i64
|
||||
|
@ -109,7 +109,7 @@ define amdgpu_kernel void @v_test_i32_neg16_sub_x(i32 addrspace(1)* %out, i32 ad
|
|||
|
||||
; GCN-LABEL: {{^}}v_test_i32_x_sub_neg17:
|
||||
; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
|
||||
; GCN: v_add_i32_e32 v{{[0-9]+}}, vcc, 17, [[X]]
|
||||
; GCN: v_add_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, 17, [[X]]
|
||||
define amdgpu_kernel void @v_test_i32_x_sub_neg17(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tid.ext = sext i32 %tid to i64
|
||||
|
@ -123,7 +123,7 @@ define amdgpu_kernel void @v_test_i32_x_sub_neg17(i32 addrspace(1)* %out, i32 ad
|
|||
|
||||
; GCN-LABEL: {{^}}v_test_i32_neg17_sub_x:
|
||||
; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
|
||||
; GCN: v_sub_i32_e32 v{{[0-9]+}}, vcc, 0xffffffef, [[X]]
|
||||
; GCN: v_sub_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, 0xffffffef, [[X]]
|
||||
define amdgpu_kernel void @v_test_i32_neg17_sub_x(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tid.ext = sext i32 %tid to i64
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
|
||||
; GCN-LABEL: {{^}}i32_fastcc_i32_i32:
|
||||
; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: v_add_i32_e32 v0, vcc, v1, v0
|
||||
; GCN-NEXT: v_add_{{[_coiu]*}}32_e32 v0, vcc, v1, v0
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define fastcc i32 @i32_fastcc_i32_i32(i32 %arg0, i32 %arg1) #1 {
|
||||
%add0 = add i32 %arg0, %arg1
|
||||
|
@ -13,7 +13,7 @@ define fastcc i32 @i32_fastcc_i32_i32(i32 %arg0, i32 %arg1) #1 {
|
|||
|
||||
; GCN-LABEL: {{^}}i32_fastcc_i32_i32_stack_object:
|
||||
; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN: v_add_i32_e32 v0, vcc, v1, v
|
||||
; GCN: v_add_{{[_coiu]*}}32_e32 v0, vcc, v1, v
|
||||
; GCN: s_mov_b32 s5, s32
|
||||
; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s5 offset:24
|
||||
; GCN: s_waitcnt vmcnt(0)
|
||||
|
@ -83,7 +83,7 @@ entry:
|
|||
; GCN-NEXT: s_mov_b32 s5, s32
|
||||
; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s5 offset:4
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: v_add_i32_e32 v0, vcc, v1, v0
|
||||
; GCN-NEXT: v_add_{{[_coiu]*}}32_e32 v0, vcc, v1, v0
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
define fastcc i32 @i32_fastcc_i32_byval_i32(i32 %arg0, i32* byval align 4 %arg1) #1 {
|
||||
%arg1.load = load i32, i32* %arg1, align 4
|
||||
|
@ -122,9 +122,9 @@ entry:
|
|||
; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-DAG: buffer_load_dword [[LOAD_0:v[0-9]+]], off, s[0:3], s5 offset:4
|
||||
; GCN-DAG: buffer_load_dword [[LOAD_1:v[0-9]+]], off, s[0:3], s5 offset:8
|
||||
; GCN-DAG: v_add_i32_e32 v0, vcc, v1, v0
|
||||
; GCN: v_add_i32_e32 v0, vcc, [[LOAD_0]], v0
|
||||
; GCN: v_add_i32_e32 v0, vcc, [[LOAD_1]], v0
|
||||
; GCN-DAG: v_add_{{[_coiu]*}}32_e32 v0, vcc, v1, v0
|
||||
; GCN: v_add_{{[_coiu]*}}32_e32 v0, vcc, [[LOAD_0]], v0
|
||||
; GCN: v_add_{{[_coiu]*}}32_e32 v0, vcc, [[LOAD_1]], v0
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define fastcc i32 @i32_fastcc_i32_i32_a32i32(i32 %arg0, i32 %arg1, [32 x i32] %large) #1 {
|
||||
%val_firststack = extractvalue [32 x i32] %large, 30
|
||||
|
|
|
@ -17,9 +17,9 @@ define amdgpu_kernel void @s_abs_i32(i32 addrspace(1)* %out, i32 %val) nounwind
|
|||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_abs_i32:
|
||||
; GCN: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SRC:v[0-9]+]]
|
||||
; GCN: v_sub_{{[iu]}}32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SRC:v[0-9]+]]
|
||||
; GCN: v_max_i32_e32 {{v[0-9]+}}, [[SRC]], [[NEG]]
|
||||
; GCN: v_add_i32
|
||||
; GCN: v_add_{{[iu]}}32
|
||||
|
||||
; EG: MAX_INT
|
||||
define amdgpu_kernel void @v_abs_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %src) nounwind {
|
||||
|
@ -33,7 +33,7 @@ define amdgpu_kernel void @v_abs_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %
|
|||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_abs_i32_repeat_user:
|
||||
; GCN: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SRC:v[0-9]+]]
|
||||
; GCN: v_sub_{{[iu]}}32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SRC:v[0-9]+]]
|
||||
; GCN: v_max_i32_e32 [[MAX:v[0-9]+]], [[SRC]], [[NEG]]
|
||||
; GCN: v_mul_lo_i32 v{{[0-9]+}}, [[MAX]], [[MAX]]
|
||||
define amdgpu_kernel void @v_abs_i32_repeat_user(i32 addrspace(1)* %out, i32 addrspace(1)* %src) nounwind {
|
||||
|
@ -68,14 +68,14 @@ define amdgpu_kernel void @s_abs_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %
|
|||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_abs_v2i32:
|
||||
; GCN-DAG: v_sub_i32_e32 [[NEG0:v[0-9]+]], vcc, 0, [[SRC0:v[0-9]+]]
|
||||
; GCN-DAG: v_sub_i32_e32 [[NEG1:v[0-9]+]], vcc, 0, [[SRC1:v[0-9]+]]
|
||||
; GCN-DAG: v_sub_{{[iu]}}32_e32 [[NEG0:v[0-9]+]], vcc, 0, [[SRC0:v[0-9]+]]
|
||||
; GCN-DAG: v_sub_{{[iu]}}32_e32 [[NEG1:v[0-9]+]], vcc, 0, [[SRC1:v[0-9]+]]
|
||||
|
||||
; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC0]], [[NEG0]]
|
||||
; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC1]], [[NEG1]]
|
||||
|
||||
; GCN: v_add_i32
|
||||
; GCN: v_add_i32
|
||||
; GCN: v_add_{{[iu]}}32
|
||||
; GCN: v_add_{{[iu]}}32
|
||||
|
||||
; EG: MAX_INT
|
||||
; EG: MAX_INT
|
||||
|
@ -127,20 +127,20 @@ define amdgpu_kernel void @s_abs_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %
|
|||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_abs_v4i32:
|
||||
; GCN-DAG: v_sub_i32_e32 [[NEG0:v[0-9]+]], vcc, 0, [[SRC0:v[0-9]+]]
|
||||
; GCN-DAG: v_sub_i32_e32 [[NEG1:v[0-9]+]], vcc, 0, [[SRC1:v[0-9]+]]
|
||||
; GCN-DAG: v_sub_i32_e32 [[NEG2:v[0-9]+]], vcc, 0, [[SRC2:v[0-9]+]]
|
||||
; GCN-DAG: v_sub_i32_e32 [[NEG3:v[0-9]+]], vcc, 0, [[SRC3:v[0-9]+]]
|
||||
; GCN-DAG: v_sub_{{[iu]}}32_e32 [[NEG0:v[0-9]+]], vcc, 0, [[SRC0:v[0-9]+]]
|
||||
; GCN-DAG: v_sub_{{[iu]}}32_e32 [[NEG1:v[0-9]+]], vcc, 0, [[SRC1:v[0-9]+]]
|
||||
; GCN-DAG: v_sub_{{[iu]}}32_e32 [[NEG2:v[0-9]+]], vcc, 0, [[SRC2:v[0-9]+]]
|
||||
; GCN-DAG: v_sub_{{[iu]}}32_e32 [[NEG3:v[0-9]+]], vcc, 0, [[SRC3:v[0-9]+]]
|
||||
|
||||
; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC0]], [[NEG0]]
|
||||
; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC1]], [[NEG1]]
|
||||
; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC2]], [[NEG2]]
|
||||
; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC3]], [[NEG3]]
|
||||
|
||||
; GCN: v_add_i32
|
||||
; GCN: v_add_i32
|
||||
; GCN: v_add_i32
|
||||
; GCN: v_add_i32
|
||||
; GCN: v_add_{{[iu]}}32
|
||||
; GCN: v_add_{{[iu]}}32
|
||||
; GCN: v_add_{{[iu]}}32
|
||||
; GCN: v_add_{{[iu]}}32
|
||||
|
||||
; EG: MAX_INT
|
||||
; EG: MAX_INT
|
||||
|
|
|
@ -8,12 +8,12 @@
|
|||
; GFX9: v_pk_max_i16 [[MAX:v[0-9]+]], [[VAL]], [[SUB]]
|
||||
; GFX9: v_pk_add_u16 [[ADD:v[0-9]+]], [[MAX]], 2
|
||||
|
||||
; VI: v_sub_i32_e32
|
||||
; VI-DAG: v_sub_i32_e32
|
||||
; VI: v_sub_u32_e32
|
||||
; VI-DAG: v_sub_u32_e32
|
||||
; VI: v_max_i32_sdwa v{{[0-9]+}}, sext(v{{[0-9]+}}), sext(v{{[0-9]+}}) dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0
|
||||
; VI: v_max_i32_sdwa v{{[0-9]+}}, sext(v{{[0-9]+}}), sext(v{{[0-9]+}}) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0
|
||||
; VI: v_add_i32_e32
|
||||
; VI: v_add_i32_e32
|
||||
; VI: v_add_u32_e32
|
||||
; VI: v_add_u32_e32
|
||||
; VI: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
||||
|
||||
; CI: v_sub_i32_e32
|
||||
|
|
|
@ -204,7 +204,7 @@ main_body:
|
|||
|
||||
; GCN-LABEL: {{^}}smrd_vgpr_offset_imm_too_large:
|
||||
; GCN-NEXT: BB#
|
||||
; GCN-NEXT: v_add_i32_e32 v0, vcc, 0x1000, v0
|
||||
; GCN-NEXT: v_add_{{[_coiu]*}}32_e32 v0, vcc, 0x1000, v0
|
||||
; GCN-NEXT: buffer_load_dword v{{[0-9]}}, v0, s[0:3], 0 offen ;
|
||||
define amdgpu_ps float @smrd_vgpr_offset_imm_too_large(<4 x i32> inreg %desc, i32 %offset) #0 {
|
||||
main_body:
|
||||
|
|
|
@ -22,7 +22,7 @@ define amdgpu_kernel void @srem_i32_4(i32 addrspace(1)* %out, i32 addrspace(1)*
|
|||
; SI: v_mov_b32_e32 [[MAGIC:v[0-9]+]], 0x92492493
|
||||
; SI: v_mul_hi_i32 {{v[0-9]+}}, {{v[0-9]+}}, [[MAGIC]]
|
||||
; SI: v_mul_lo_i32
|
||||
; SI: v_sub_i32
|
||||
; SI: v_sub_{{[iu]}}32
|
||||
; SI: s_endpgm
|
||||
define amdgpu_kernel void @srem_i32_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
|
||||
%num = load i32, i32 addrspace(1) * %in
|
||||
|
|
|
@ -51,7 +51,7 @@ define amdgpu_kernel void @s_ssubo_i64(i64 addrspace(1)* %out, i1 addrspace(1)*
|
|||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_ssubo_i64:
|
||||
; SI: v_sub_i32_e32
|
||||
; SI: v_sub_{{[iu]}}32_e32
|
||||
; SI: v_subb_u32_e32
|
||||
define amdgpu_kernel void @v_ssubo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
|
||||
%a = load i64, i64 addrspace(1)* %aptr, align 4
|
||||
|
|
|
@ -98,7 +98,7 @@ entry:
|
|||
; GCN: s_waitcnt
|
||||
; GFX9-NEXT: global_store_short_d16_hi v[0:1], v2, off offset:4094
|
||||
|
||||
; VI-DAG: v_add_i32_e32
|
||||
; VI-DAG: v_add_u32_e32
|
||||
; VI-DAG: v_addc_u32_e32
|
||||
; VI-DAG: v_lshrrev_b32_e32 v2, 16, v2
|
||||
|
||||
|
@ -119,7 +119,7 @@ entry:
|
|||
; GCN: s_waitcnt
|
||||
; GFX9-NEXT: global_store_short_d16_hi v[0:1], v2, off offset:-4096{{$}}
|
||||
|
||||
; VI-DAG: v_add_i32_e32
|
||||
; VI-DAG: v_add_u32_e32
|
||||
; VI-DAG: v_addc_u32_e32
|
||||
; VI-DAG: v_lshrrev_b32_e32 v2, 16, v2
|
||||
|
||||
|
@ -139,7 +139,7 @@ entry:
|
|||
; GCN: s_waitcnt
|
||||
; GFX9-NEXT: global_store_byte_d16_hi v[0:1], v2, off offset:4095
|
||||
|
||||
; VI-DAG: v_add_i32_e32
|
||||
; VI-DAG: v_add_u32_e32
|
||||
; VI-DAG: v_addc_u32_e32
|
||||
; VI-DAG: v_lshrrev_b32_e32 v2, 16, v2
|
||||
; VI: flat_store_byte v[0:1], v{{[0-9]$}}
|
||||
|
@ -160,7 +160,7 @@ entry:
|
|||
; GCN: s_waitcnt
|
||||
; GFX9-NEXT: global_store_byte_d16_hi v[0:1], v2, off offset:-4095
|
||||
|
||||
; VI-DAG: v_add_i32_e32
|
||||
; VI-DAG: v_add_u32_e32
|
||||
; VI-DAG: v_addc_u32_e32
|
||||
; VI-DAG: v_lshrrev_b32_e32 v2, 16, v2
|
||||
|
||||
|
@ -272,7 +272,7 @@ entry:
|
|||
; GCN: s_waitcnt
|
||||
; GFX9-NEXT: flat_store_short_d16_hi v[0:1], v2 offset:4094{{$}}
|
||||
|
||||
; VI-DAG: v_add_i32_e32
|
||||
; VI-DAG: v_add_u32_e32
|
||||
; VI-DAG: v_addc_u32_e32
|
||||
; VI-DAG: v_lshrrev_b32_e32 v2, 16, v2
|
||||
; VI: flat_store_short v[0:1], v2{{$}}
|
||||
|
@ -289,8 +289,9 @@ entry:
|
|||
|
||||
; GCN-LABEL: {{^}}store_flat_hi_v2i16_neg_offset:
|
||||
; GCN: s_waitcnt
|
||||
; GCN: v_add_i32_e32
|
||||
; GCN: v_addc_u32_e32
|
||||
; GCN: v_add_{{[_cou]*}}32_e32
|
||||
; VI: v_addc_u32_e32
|
||||
; GFX9: v_addc_co_u32_e32
|
||||
|
||||
; GFX9-NEXT: flat_store_short_d16_hi v[0:1], v2{{$}}
|
||||
; VI: flat_store_short v[0:1], v2{{$}}
|
||||
|
@ -310,7 +311,7 @@ entry:
|
|||
; GFX9-NEXT: flat_store_byte_d16_hi v[0:1], v2 offset:4095{{$}}
|
||||
|
||||
; VI-DAG: v_lshrrev_b32_e32 v2, 16, v2
|
||||
; VI-DAG: v_add_i32_e32
|
||||
; VI-DAG: v_add_u32_e32
|
||||
; VI-DAG: v_addc_u32_e32
|
||||
; VI: flat_store_byte v[0:1], v2{{$}}
|
||||
; GCN-NEXT: s_waitcnt
|
||||
|
@ -327,8 +328,9 @@ entry:
|
|||
|
||||
; GCN-LABEL: {{^}}store_flat_hi_v2i16_i8_neg_offset:
|
||||
; GCN: s_waitcnt
|
||||
; GCN-DAG: v_add_i32_e32
|
||||
; GCN-DAG: v_addc_u32_e32
|
||||
; GCN-DAG: v_add_{{[_cou]*}}32_e32
|
||||
; VI-DAG: v_addc_u32_e32
|
||||
; GFX9-DAG: v_addc_co_u32_e32
|
||||
|
||||
; GFX9-NEXT: flat_store_byte_d16_hi v[0:1], v2{{$}}
|
||||
; VI-DAG: v_lshrrev_b32_e32 v2, 16, v2
|
||||
|
|
|
@ -49,8 +49,8 @@ define amdgpu_kernel void @s_test_sub_self_v2i16(<2 x i16> addrspace(1)* %out, <
|
|||
; GCN-LABEL: {{^}}s_test_sub_v2i16_kernarg:
|
||||
; GFX9: v_pk_sub_i16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
|
||||
|
||||
; VI: v_subrev_i32_e32
|
||||
; VI: v_subrev_i32_sdwa v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
||||
; VI: v_subrev_u32_e32
|
||||
; VI: v_subrev_u32_sdwa v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
||||
define amdgpu_kernel void @s_test_sub_v2i16_kernarg(<2 x i16> addrspace(1)* %out, <2 x i16> %a, <2 x i16> %b) #1 {
|
||||
%add = sub <2 x i16> %a, %b
|
||||
store <2 x i16> %add, <2 x i16> addrspace(1)* %out
|
||||
|
|
|
@ -22,7 +22,7 @@ define amdgpu_kernel void @s_uaddo_i64_zext(i64 addrspace(1)* %out, i64 %a, i64
|
|||
; FIXME: Could do scalar
|
||||
|
||||
; FUNC-LABEL: {{^}}s_uaddo_i32:
|
||||
; GCN: v_add_i32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GCN: v_add_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc
|
||||
|
||||
; EG: ADDC_UINT
|
||||
|
@ -37,7 +37,7 @@ define amdgpu_kernel void @s_uaddo_i32(i32 addrspace(1)* %out, i1 addrspace(1)*
|
|||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_uaddo_i32:
|
||||
; GCN: v_add_i32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GCN: v_add_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc
|
||||
|
||||
; EG: ADDC_UINT
|
||||
|
@ -58,7 +58,7 @@ define amdgpu_kernel void @v_uaddo_i32(i32 addrspace(1)* %out, i1 addrspace(1)*
|
|||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_uaddo_i32_novcc:
|
||||
; GCN: v_add_i32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GCN: v_add_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc
|
||||
|
||||
; EG: ADDC_UINT
|
||||
|
@ -95,7 +95,7 @@ define amdgpu_kernel void @s_uaddo_i64(i64 addrspace(1)* %out, i1 addrspace(1)*
|
|||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_uaddo_i64:
|
||||
; GCN: v_add_i32
|
||||
; GCN: v_add_{{[iu]}}32
|
||||
; GCN: v_addc_u32
|
||||
|
||||
; EG: ADDC_UINT
|
||||
|
|
|
@ -30,24 +30,24 @@
|
|||
; SI: v_rcp_iflag_f32_e32 [[RCP:v[0-9]+]]
|
||||
; SI-DAG: v_mul_hi_u32 [[RCP_HI:v[0-9]+]], [[RCP]]
|
||||
; SI-DAG: v_mul_lo_i32 [[RCP_LO:v[0-9]+]], [[RCP]]
|
||||
; SI-DAG: v_sub_i32_e32 [[NEG_RCP_LO:v[0-9]+]], vcc, 0, [[RCP_LO]]
|
||||
; SI-DAG: v_sub_{{[iu]}}32_e32 [[NEG_RCP_LO:v[0-9]+]], vcc, 0, [[RCP_LO]]
|
||||
; SI: v_cndmask_b32_e64
|
||||
; SI: v_mul_hi_u32 [[E:v[0-9]+]], {{v[0-9]+}}, [[RCP]]
|
||||
; SI-DAG: v_add_i32_e32 [[RCP_A_E:v[0-9]+]], vcc, [[E]], [[RCP]]
|
||||
; SI-DAG: v_subrev_i32_e32 [[RCP_S_E:v[0-9]+]], vcc, [[E]], [[RCP]]
|
||||
; SI-DAG: v_add_{{[iu]}}32_e32 [[RCP_A_E:v[0-9]+]], vcc, [[E]], [[RCP]]
|
||||
; SI-DAG: v_subrev_{{[iu]}}32_e32 [[RCP_S_E:v[0-9]+]], vcc, [[E]], [[RCP]]
|
||||
; SI: v_cndmask_b32_e64
|
||||
; SI: v_mul_hi_u32 [[Quotient:v[0-9]+]]
|
||||
; SI: v_mul_lo_i32 [[Num_S_Remainder:v[0-9]+]]
|
||||
; SI-DAG: v_add_i32_e32 [[Quotient_A_One:v[0-9]+]], vcc, 1, [[Quotient]]
|
||||
; SI-DAG: v_sub_i32_e32 [[Remainder:v[0-9]+]], vcc, {{[vs][0-9]+}}, [[Num_S_Remainder]]
|
||||
; SI-DAG: v_add_{{[iu]}}32_e32 [[Quotient_A_One:v[0-9]+]], vcc, 1, [[Quotient]]
|
||||
; SI-DAG: v_sub_{{[iu]}}32_e32 [[Remainder:v[0-9]+]], vcc, {{[vs][0-9]+}}, [[Num_S_Remainder]]
|
||||
; SI-DAG: v_cndmask_b32_e64
|
||||
; SI-DAG: v_cndmask_b32_e64
|
||||
; SI-DAG: v_subrev_i32_e32 [[Quotient_S_One:v[0-9]+]],
|
||||
; SI-DAG: v_subrev_i32_e32 [[Remainder_S_Den:v[0-9]+]],
|
||||
; SI-DAG: v_subrev_{{[iu]}}32_e32 [[Quotient_S_One:v[0-9]+]],
|
||||
; SI-DAG: v_subrev_{{[iu]}}32_e32 [[Remainder_S_Den:v[0-9]+]],
|
||||
; SI: v_and_b32_e32 [[Tmp1:v[0-9]+]]
|
||||
; SI-DAG: v_cndmask_b32_e64
|
||||
; SI-DAG: v_cndmask_b32_e64
|
||||
; SI-DAG: v_add_i32_e32 [[Remainder_A_Den:v[0-9]+]],
|
||||
; SI-DAG: v_add_{{[iu]}}32_e32 [[Remainder_A_Den:v[0-9]+]],
|
||||
; SI-DAG: v_cndmask_b32_e64
|
||||
; SI-DAG: v_cndmask_b32_e64
|
||||
; SI: s_endpgm
|
||||
|
@ -114,47 +114,47 @@ define amdgpu_kernel void @test_udivrem(i32 addrspace(1)* %out0, i32 addrspace(1
|
|||
; SI-DAG: v_rcp_iflag_f32_e32
|
||||
; SI-DAG: v_mul_hi_u32
|
||||
; SI-DAG: v_mul_lo_i32
|
||||
; SI-DAG: v_sub_i32_e32
|
||||
; SI-DAG: v_sub_{{[iu]}}32_e32
|
||||
; SI-DAG: v_cndmask_b32_e64
|
||||
; SI-DAG: v_mul_hi_u32
|
||||
; SI-DAG: v_add_i32_e32
|
||||
; SI-DAG: v_subrev_i32_e32
|
||||
; SI-DAG: v_add_{{[iu]}}32_e32
|
||||
; SI-DAG: v_subrev_{{[iu]}}32_e32
|
||||
; SI-DAG: v_cndmask_b32_e64
|
||||
; SI-DAG: v_mul_hi_u32
|
||||
; SI-DAG: v_mul_lo_i32
|
||||
; SI-DAG: v_subrev_i32_e32
|
||||
; SI-DAG: v_subrev_{{[iu]}}32_e32
|
||||
; SI-DAG: v_cndmask_b32_e64
|
||||
; SI-DAG: v_cndmask_b32_e64
|
||||
; SI-DAG: v_and_b32_e32
|
||||
; SI-DAG: v_add_i32_e32
|
||||
; SI-DAG: v_subrev_i32_e32
|
||||
; SI-DAG: v_add_{{[iu]}}32_e32
|
||||
; SI-DAG: v_subrev_{{[iu]}}32_e32
|
||||
; SI-DAG: v_cndmask_b32_e64
|
||||
; SI-DAG: v_cndmask_b32_e64
|
||||
; SI-DAG: v_add_i32_e32
|
||||
; SI-DAG: v_subrev_i32_e32
|
||||
; SI-DAG: v_add_{{[iu]}}32_e32
|
||||
; SI-DAG: v_subrev_{{[iu]}}32_e32
|
||||
; SI-DAG: v_cndmask_b32_e64
|
||||
; SI-DAG: v_cndmask_b32_e64
|
||||
; SI-DAG: v_rcp_iflag_f32_e32
|
||||
; SI-DAG: v_mul_hi_u32
|
||||
; SI-DAG: v_mul_lo_i32
|
||||
; SI-DAG: v_sub_i32_e32
|
||||
; SI-DAG: v_sub_{{[iu]}}32_e32
|
||||
; SI-DAG: v_cndmask_b32_e64
|
||||
; SI-DAG: v_mul_hi_u32
|
||||
; SI-DAG: v_add_i32_e32
|
||||
; SI-DAG: v_subrev_i32_e32
|
||||
; SI-DAG: v_add_{{[iu]}}32_e32
|
||||
; SI-DAG: v_subrev_{{[iu]}}32_e32
|
||||
; SI-DAG: v_cndmask_b32_e64
|
||||
; SI-DAG: v_mul_hi_u32
|
||||
; SI-DAG: v_mul_lo_i32
|
||||
; SI-DAG: v_subrev_i32_e32
|
||||
; SI-DAG: v_subrev_{{[iu]}}32_e32
|
||||
; SI-DAG: v_cndmask_b32_e64
|
||||
; SI-DAG: v_cndmask_b32_e64
|
||||
; SI-DAG: v_and_b32_e32
|
||||
; SI-DAG: v_add_i32_e32
|
||||
; SI-DAG: v_subrev_i32_e32
|
||||
; SI-DAG: v_add_{{[iu]}}32_e32
|
||||
; SI-DAG: v_subrev_{{[iu]}}32_e32
|
||||
; SI-DAG: v_cndmask_b32_e64
|
||||
; SI-DAG: v_cndmask_b32_e64
|
||||
; SI-DAG: v_add_i32_e32
|
||||
; SI-DAG: v_subrev_i32_e32
|
||||
; SI-DAG: v_add_{{[iu]}}32_e32
|
||||
; SI-DAG: v_subrev_{{[iu]}}32_e32
|
||||
; SI-DAG: v_cndmask_b32_e64
|
||||
; SI-DAG: v_cndmask_b32_e64
|
||||
; SI: s_endpgm
|
||||
|
@ -264,80 +264,80 @@ define amdgpu_kernel void @test_udivrem_v2(<2 x i32> addrspace(1)* %out, <2 x i3
|
|||
; SI-DAG: v_rcp_iflag_f32_e32
|
||||
; SI-DAG: v_mul_hi_u32
|
||||
; SI-DAG: v_mul_lo_i32
|
||||
; SI-DAG: v_sub_i32_e32
|
||||
; SI-DAG: v_sub_{{[iu]}}32_e32
|
||||
; SI-DAG: v_cndmask_b32_e64
|
||||
; SI-DAG: v_mul_hi_u32
|
||||
; SI-DAG: v_add_i32_e32
|
||||
; SI-DAG: v_subrev_i32_e32
|
||||
; SI-DAG: v_add_{{[iu]}}32_e32
|
||||
; SI-DAG: v_subrev_{{[iu]}}32_e32
|
||||
; SI-DAG: v_cndmask_b32_e64
|
||||
; SI-DAG: v_mul_hi_u32
|
||||
; SI-DAG: v_mul_lo_i32
|
||||
; SI-DAG: v_subrev_i32_e32
|
||||
; SI-DAG: v_subrev_{{[iu]}}32_e32
|
||||
; SI-DAG: v_cndmask_b32_e64
|
||||
; SI-DAG: v_cndmask_b32_e64
|
||||
; SI-DAG: v_and_b32_e32
|
||||
; SI-DAG: v_add_i32_e32
|
||||
; SI-DAG: v_subrev_i32_e32
|
||||
; SI-DAG: v_add_{{[iu]}}32_e32
|
||||
; SI-DAG: v_subrev_{{[iu]}}32_e32
|
||||
; SI-DAG: v_cndmask_b32_e64
|
||||
; SI-DAG: v_cndmask_b32_e64
|
||||
; SI-DAG: v_add_i32_e32
|
||||
; SI-DAG: v_subrev_i32_e32
|
||||
; SI-DAG: v_add_{{[iu]}}32_e32
|
||||
; SI-DAG: v_subrev_{{[iu]}}32_e32
|
||||
; SI-DAG: v_cndmask_b32_e64
|
||||
; SI-DAG: v_cndmask_b32_e64
|
||||
; SI-DAG: v_rcp_iflag_f32_e32
|
||||
; SI-DAG: v_mul_hi_u32
|
||||
; SI-DAG: v_mul_lo_i32
|
||||
; SI-DAG: v_sub_i32_e32
|
||||
; SI-DAG: v_sub_{{[iu]}}32_e32
|
||||
; SI-DAG: v_cndmask_b32_e64
|
||||
; SI-DAG: v_mul_hi_u32
|
||||
; SI-DAG: v_add_i32_e32
|
||||
; SI-DAG: v_subrev_i32_e32
|
||||
; SI-DAG: v_add_{{[iu]}}32_e32
|
||||
; SI-DAG: v_subrev_{{[iu]}}32_e32
|
||||
; SI-DAG: v_cndmask_b32_e64
|
||||
; SI-DAG: v_mul_hi_u32
|
||||
; SI-DAG: v_mul_lo_i32
|
||||
; SI-DAG: v_subrev_i32_e32
|
||||
; SI-DAG: v_subrev_{{[iu]}}32_e32
|
||||
; SI-DAG: v_cndmask_b32_e64
|
||||
; SI-DAG: v_cndmask_b32_e64
|
||||
; SI-DAG: v_and_b32_e32
|
||||
; SI-DAG: v_add_i32_e32
|
||||
; SI-DAG: v_subrev_i32_e32
|
||||
; SI-DAG: v_add_{{[iu]}}32_e32
|
||||
; SI-DAG: v_subrev_{{[iu]}}32_e32
|
||||
; SI-DAG: v_cndmask_b32_e64
|
||||
; SI-DAG: v_cndmask_b32_e64
|
||||
; SI-DAG: v_add_i32_e32
|
||||
; SI-DAG: v_subrev_i32_e32
|
||||
; SI-DAG: v_add_{{[iu]}}32_e32
|
||||
; SI-DAG: v_subrev_{{[iu]}}32_e32
|
||||
; SI-DAG: v_cndmask_b32_e64
|
||||
; SI-DAG: v_cndmask_b32_e64
|
||||
; SI-DAG: v_rcp_iflag_f32_e32
|
||||
; SI-DAG: v_mul_hi_u32
|
||||
; SI-DAG: v_mul_lo_i32
|
||||
; SI-DAG: v_sub_i32_e32
|
||||
; SI-DAG: v_sub_{{[iu]}}32_e32
|
||||
; SI-DAG: v_cndmask_b32_e64
|
||||
; SI-DAG: v_mul_hi_u32
|
||||
; SI-DAG: v_add_i32_e32
|
||||
; SI-DAG: v_subrev_i32_e32
|
||||
; SI-DAG: v_add_{{[iu]}}32_e32
|
||||
; SI-DAG: v_subrev_{{[iu]}}32_e32
|
||||
; SI-DAG: v_cndmask_b32_e64
|
||||
; SI-DAG: v_mul_hi_u32
|
||||
; SI-DAG: v_mul_lo_i32
|
||||
; SI-DAG: v_subrev_i32_e32
|
||||
; SI-DAG: v_subrev_{{[iu]}}32_e32
|
||||
; SI-DAG: v_cndmask_b32_e64
|
||||
; SI-DAG: v_cndmask_b32_e64
|
||||
; SI-DAG: v_and_b32_e32
|
||||
; SI-DAG: v_add_i32_e32
|
||||
; SI-DAG: v_subrev_i32_e32
|
||||
; SI-DAG: v_add_{{[iu]}}32_e32
|
||||
; SI-DAG: v_subrev_{{[iu]}}32_e32
|
||||
; SI-DAG: v_cndmask_b32_e64
|
||||
; SI-DAG: v_cndmask_b32_e64
|
||||
; SI-DAG: v_add_i32_e32
|
||||
; SI-DAG: v_subrev_i32_e32
|
||||
; SI-DAG: v_add_{{[iu]}}32_e32
|
||||
; SI-DAG: v_subrev_{{[iu]}}32_e32
|
||||
; SI-DAG: v_cndmask_b32_e64
|
||||
; SI-DAG: v_cndmask_b32_e64
|
||||
; SI-DAG: v_rcp_iflag_f32_e32
|
||||
; SI-DAG: v_mul_hi_u32
|
||||
; SI-DAG: v_mul_lo_i32
|
||||
; SI-DAG: v_sub_i32_e32
|
||||
; SI-DAG: v_sub_{{[iu]}}32_e32
|
||||
; SI-DAG: v_cndmask_b32_e64
|
||||
; SI-DAG: v_mul_hi_u32
|
||||
; SI-DAG: v_add_i32_e32
|
||||
; SI-DAG: v_subrev_i32_e32
|
||||
; SI-DAG: v_add_{{[iu]}}32_e32
|
||||
; SI-DAG: v_subrev_{{[iu]}}32_e32
|
||||
; SI-DAG: v_cndmask_b32_e64
|
||||
; SI: s_endpgm
|
||||
define amdgpu_kernel void @test_udivrem_v4(<4 x i32> addrspace(1)* %out, <4 x i32> %x, <4 x i32> %y) {
|
||||
|
|
|
@ -21,7 +21,7 @@ define amdgpu_kernel void @s_uint_to_fp_i64_to_f16(half addrspace(1)* %out, i64
|
|||
; GCN-DAG: v_cmp_eq_u64
|
||||
; GCN-DAG: v_cmp_gt_u64
|
||||
|
||||
; GCN: v_add_i32_e32 [[VR:v[0-9]+]]
|
||||
; GCN: v_add_{{[iu]}}32_e32 [[VR:v[0-9]+]]
|
||||
; GCN: v_cvt_f16_f32_e32 [[VR_F16:v[0-9]+]], [[VR]]
|
||||
; GCN: {{buffer|flat}}_store_short {{.*}}[[VR_F16]]
|
||||
define amdgpu_kernel void @v_uint_to_fp_i64_to_f16(half addrspace(1)* %out, i64 addrspace(1)* %in) #0 {
|
||||
|
@ -52,7 +52,7 @@ define amdgpu_kernel void @s_uint_to_fp_i64_to_f32(float addrspace(1)* %out, i64
|
|||
; GCN-DAG: v_cmp_eq_u64
|
||||
; GCN-DAG: v_cmp_gt_u64
|
||||
|
||||
; GCN: v_add_i32_e32 [[VR:v[0-9]+]]
|
||||
; GCN: v_add_{{[iu]}}32_e32 [[VR:v[0-9]+]]
|
||||
; GCN: {{buffer|flat}}_store_dword {{.*}}[[VR]]
|
||||
define amdgpu_kernel void @v_uint_to_fp_i64_to_f32(float addrspace(1)* %out, i64 addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
|
|
|
@ -560,7 +560,7 @@ done:
|
|||
}
|
||||
|
||||
; GCN-LABEL: {{^}}move_to_valu_vgpr_operand_phi:
|
||||
; GCN: v_add_i32_e32
|
||||
; GCN: v_add_{{[iu]}}32_e32
|
||||
; GCN: ds_write_b32
|
||||
define void @move_to_valu_vgpr_operand_phi(i32 addrspace(3)* %out) {
|
||||
bb0:
|
||||
|
|
|
@ -21,9 +21,9 @@ define amdgpu_kernel void @test_urem_i32(i32 addrspace(1)* %out, i32 addrspace(1
|
|||
; FUNC-LABEL: {{^}}test_urem_i32_7:
|
||||
; SI: v_mov_b32_e32 [[MAGIC:v[0-9]+]], 0x24924925
|
||||
; SI: v_mul_hi_u32 [[MAGIC]], {{v[0-9]+}}
|
||||
; SI: v_subrev_i32
|
||||
; SI: v_subrev_{{[iu]}}32
|
||||
; SI: v_mul_lo_i32
|
||||
; SI: v_sub_i32
|
||||
; SI: v_sub_{{[iu]}}32
|
||||
; SI: buffer_store_dword
|
||||
; SI: s_endpgm
|
||||
define amdgpu_kernel void @test_urem_i32_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
|
||||
|
|
|
@ -22,7 +22,7 @@ define amdgpu_kernel void @s_usubo_i64_zext(i64 addrspace(1)* %out, i64 %a, i64
|
|||
; FIXME: Could do scalar
|
||||
|
||||
; FUNC-LABEL: {{^}}s_usubo_i32:
|
||||
; GCN: v_sub_i32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GCN: v_sub_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc
|
||||
|
||||
; EG-DAG: SUBB_UINT
|
||||
|
@ -37,7 +37,7 @@ define amdgpu_kernel void @s_usubo_i32(i32 addrspace(1)* %out, i1 addrspace(1)*
|
|||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_usubo_i32:
|
||||
; GCN: v_sub_i32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GCN: v_sub_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc
|
||||
|
||||
; EG-DAG: SUBB_UINT
|
||||
|
@ -58,7 +58,7 @@ define amdgpu_kernel void @v_usubo_i32(i32 addrspace(1)* %out, i1 addrspace(1)*
|
|||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_usubo_i32_novcc:
|
||||
; GCN: v_sub_i32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GCN: v_sub_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc
|
||||
|
||||
; EG-DAG: SUBB_UINT
|
||||
|
@ -97,7 +97,7 @@ define amdgpu_kernel void @s_usubo_i64(i64 addrspace(1)* %out, i1 addrspace(1)*
|
|||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_usubo_i64:
|
||||
; GCN: v_sub_i32
|
||||
; GCN: v_sub_{{[iu]}}32
|
||||
; GCN: v_subb_u32
|
||||
|
||||
; EG-DAG: SUBB_UINT
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
; Test that we correctly commute a sub instruction
|
||||
; FUNC-LABEL: {{^}}sub_rev:
|
||||
; SI-NOT: v_sub_i32_e32 v{{[0-9]+}}, vcc, s
|
||||
; SI: v_subrev_i32_e32 v{{[0-9]+}}, vcc, s
|
||||
; SI: v_subrev_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, s
|
||||
|
||||
; ModuleID = 'vop-shrink.ll'
|
||||
|
||||
|
|
|
@ -169,7 +169,7 @@ main_body:
|
|||
;CHECK: s_or_saveexec_b64 s{{\[[0-9]+:[0-9]+\]}}, -1
|
||||
;CHECK: buffer_load_dword
|
||||
;CHECK: buffer_load_dword
|
||||
;CHECK: v_add_i32_e32
|
||||
;CHECK: v_add_{{[iu]}}32_e32
|
||||
define amdgpu_ps float @test_wwm2(i32 inreg %idx0, i32 inreg %idx1) {
|
||||
main_body:
|
||||
%src0 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx0, i32 0, i1 0, i1 0)
|
||||
|
@ -303,7 +303,7 @@ endif:
|
|||
;CHECK: v_mov_b32_e32
|
||||
;CHECK: s_not_b64 exec, exec
|
||||
;CHECK: s_or_saveexec_b64 s{{\[[0-9]+:[0-9]+\]}}, -1
|
||||
;CHECK: v_add_i32_e32
|
||||
;CHECK: v_add_{{[iu]}}32_e32
|
||||
define amdgpu_ps void @test_set_inactive1(i32 inreg %idx) {
|
||||
main_body:
|
||||
%src = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx, i32 0, i1 0, i1 0)
|
||||
|
|
|
@ -1,104 +1,94 @@
|
|||
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck -check-prefixes=GCN,GFX9 %s
|
||||
// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji -show-encoding %s | FileCheck -check-prefixes=GCN,VI %s
|
||||
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck -check-prefixes=GFX9 %s
|
||||
|
||||
// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji %s 2>&1 | FileCheck -check-prefixes=ERR-SICIVI %s
|
||||
// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck -check-prefixes=ERR-SICIVI %s
|
||||
// FIXME: pre-gfx9 errors should be more useful
|
||||
|
||||
|
||||
// FIXME: These should parse to VOP2 encoding
|
||||
v_add_u32 v1, v2, v3
|
||||
// GFX9: v_add_u32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x34,0xd1,0x02,0x07,0x02,0x00]
|
||||
// ERR-SICIVI: :15: error: invalid operand for instruction
|
||||
// GFX9: v_add_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x68]
|
||||
// ERR-SICIVI: error: instruction not supported on this GPU
|
||||
|
||||
v_add_u32 v1, v2, s1
|
||||
// GFX9: v_add_u32_e64 v1, v2, s1 ; encoding: [0x01,0x00,0x34,0xd1,0x02,0x03,0x00,0x00]
|
||||
// ERR-SICIVI: :15: error: invalid operand for instruction
|
||||
// ERR-SICIVI: error: instruction not supported on this GPU
|
||||
|
||||
v_add_u32 v1, s1, v2
|
||||
// GFX9: v_add_u32_e64 v1, s1, v2 ; encoding: [0x01,0x00,0x34,0xd1,0x01,0x04,0x02,0x00]
|
||||
// ERR-SICIVI: :15: error: invalid operand for instruction
|
||||
// GFX9: v_add_u32_e32 v1, s1, v2 ; encoding: [0x01,0x04,0x02,0x68]
|
||||
// ERR-SICIVI: error: instruction not supported on this GPU
|
||||
|
||||
v_add_u32 v1, 4.0, v2
|
||||
// GFX9: v_add_u32_e64 v1, 4.0, v2 ; encoding: [0x01,0x00,0x34,0xd1,0xf6,0x04,0x02,0x00]
|
||||
// ERR-SICIVI: :15: error: invalid operand for instruction
|
||||
// GFX9: v_add_u32_e32 v1, 4.0, v2 ; encoding: [0xf6,0x04,0x02,0x68]
|
||||
// ERR-SICIVI: error: instruction not supported on this GPU
|
||||
|
||||
v_add_u32 v1, v2, 4.0
|
||||
// GFX9: v_add_u32_e64 v1, v2, 4.0 ; encoding: [0x01,0x00,0x34,0xd1,0x02,0xed,0x01,0x00]
|
||||
// ERR-SICIVI: :15: error: invalid operand for instruction
|
||||
// ERR-SICIVI: error: instruction not supported on this GPU
|
||||
|
||||
v_add_u32_e32 v1, v2, v3
|
||||
// GFX9: v_add_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x68]
|
||||
// ERR-SICIVI: :19: error: invalid operand for instruction
|
||||
// ERR-SICIVI: error: instruction not supported on this GPU
|
||||
|
||||
v_add_u32_e32 v1, s1, v3
|
||||
// GFX9: v_add_u32_e32 v1, s1, v3 ; encoding: [0x01,0x06,0x02,0x68]
|
||||
// ERR-SICIVI: :19: error: invalid operand for instruction
|
||||
// ERR-SICIVI: error: instruction not supported on this GPU
|
||||
|
||||
|
||||
|
||||
v_sub_u32 v1, v2, v3
|
||||
// GFX9: v_sub_u32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x35,0xd1,0x02,0x07,0x02,0x00]
|
||||
// ERR-SICIVI: :15: error: invalid operand for instruction
|
||||
// GFX9: v_sub_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x6a]
|
||||
// ERR-SICIVI: error: instruction not supported on this GPU
|
||||
|
||||
v_sub_u32 v1, v2, s1
|
||||
// GFX9: v_sub_u32_e64 v1, v2, s1 ; encoding: [0x01,0x00,0x35,0xd1,0x02,0x03,0x00,0x00]
|
||||
// ERR-SICIVI: :15: error: invalid operand for instruction
|
||||
// ERR-SICIVI: error: instruction not supported on this GPU
|
||||
|
||||
v_sub_u32 v1, s1, v2
|
||||
// GFX9: v_sub_u32_e64 v1, s1, v2 ; encoding: [0x01,0x00,0x35,0xd1,0x01,0x04,0x02,0x00]
|
||||
// ERR-SICIVI: :15: error: invalid operand for instruction
|
||||
// GFX9: v_sub_u32_e32 v1, s1, v2 ; encoding: [0x01,0x04,0x02,0x6a]
|
||||
// ERR-SICIVI: error: instruction not supported on this GPU
|
||||
|
||||
v_sub_u32 v1, 4.0, v2
|
||||
// GFX9: v_sub_u32_e64 v1, 4.0, v2 ; encoding: [0x01,0x00,0x35,0xd1,0xf6,0x04,0x02,0x00]
|
||||
// ERR-SICIVI: :15: error: invalid operand for instruction
|
||||
// GFX9: v_sub_u32_e32 v1, 4.0, v2 ; encoding: [0xf6,0x04,0x02,0x6a]
|
||||
// ERR-SICIVI: error: instruction not supported on this GPU
|
||||
|
||||
v_sub_u32 v1, v2, 4.0
|
||||
// GFX9: v_sub_u32_e64 v1, v2, 4.0 ; encoding: [0x01,0x00,0x35,0xd1,0x02,0xed,0x01,0x00]
|
||||
// ERR-SICIVI: :15: error: invalid operand for instruction
|
||||
// ERR-SICIVI: error: instruction not supported on this GPU
|
||||
|
||||
v_sub_u32_e32 v1, v2, v3
|
||||
// GFX9: v_sub_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x6a]
|
||||
// ERR-SICIVI: :19: error: invalid operand for instruction
|
||||
// ERR-SICIVI: error: instruction not supported on this GPU
|
||||
|
||||
v_sub_u32_e32 v1, s1, v3
|
||||
// GFX9: v_sub_u32_e32 v1, s1, v3 ; encoding: [0x01,0x06,0x02,0x6a]
|
||||
// ERR-SICIVI: :19: error: invalid operand for instruction
|
||||
// ERR-SICIVI: error: instruction not supported on this GPU
|
||||
|
||||
|
||||
|
||||
v_subrev_u32 v1, v2, v3
|
||||
// GFX9: v_subrev_u32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x36,0xd1,0x02,0x07,0x02,0x00]
|
||||
// ERR-SICIVI: :18: error: invalid operand for instruction
|
||||
// GFX9: v_subrev_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x6c]
|
||||
// ERR-SICIVI: error: instruction not supported on this GPU
|
||||
|
||||
v_subrev_u32 v1, v2, s1
|
||||
// GFX9: v_subrev_u32_e64 v1, v2, s1 ; encoding: [0x01,0x00,0x36,0xd1,0x02,0x03,0x00,0x00]
|
||||
// ERR-SICIVI: :18: error: invalid operand for instruction
|
||||
// ERR-SICIVI: error: instruction not supported on this GPU
|
||||
|
||||
v_subrev_u32 v1, s1, v2
|
||||
// GFX9: v_subrev_u32_e64 v1, s1, v2 ; encoding: [0x01,0x00,0x36,0xd1,0x01,0x04,0x02,0x00]
|
||||
// ERR-SICIVI: :18: error: invalid operand for instruction
|
||||
// GFX9: v_subrev_u32_e32 v1, s1, v2 ; encoding: [0x01,0x04,0x02,0x6c]
|
||||
// ERR-SICIVI: error: instruction not supported on this GPU
|
||||
|
||||
v_subrev_u32 v1, 4.0, v2
|
||||
// GFX9: v_subrev_u32_e64 v1, 4.0, v2 ; encoding: [0x01,0x00,0x36,0xd1,0xf6,0x04,0x02,0x00]
|
||||
// ERR-SICIVI: :18: error: invalid operand for instruction
|
||||
// GFX9: v_subrev_u32_e32 v1, 4.0, v2 ; encoding: [0xf6,0x04,0x02,0x6c]
|
||||
// ERR-SICIVI: error: instruction not supported on this GPU
|
||||
|
||||
v_subrev_u32 v1, v2, 4.0
|
||||
// GFX9: v_subrev_u32_e64 v1, v2, 4.0 ; encoding: [0x01,0x00,0x36,0xd1,0x02,0xed,0x01,0x00]
|
||||
// ERR-SICIVI: :18: error: invalid operand for instruction
|
||||
// ERR-SICIVI: error: instruction not supported on this GPU
|
||||
|
||||
v_subrev_u32_e32 v1, v2, v3
|
||||
// GFX9: v_subrev_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x6c]
|
||||
// ERR-SICIVI: :22: error: invalid operand for instruction
|
||||
// ERR-SICIVI: error: instruction not supported on this GPU
|
||||
|
||||
v_subrev_u32_e32 v1, s1, v3
|
||||
// GFX9: v_subrev_u32_e32 v1, s1, v3 ; encoding: [0x01,0x06,0x02,0x6c]
|
||||
// ERR-SICIVI: :22: error: invalid operand for instruction
|
||||
|
||||
|
||||
|
||||
v_add_u32 v1, vcc, v2, v3
|
||||
// GCN: v_add_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x32]
|
||||
|
||||
v_add_u32 v1, s[0:1], v2, v3
|
||||
// GCN: v_add_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x19,0xd1,0x02,0x07,0x02,0x00]
|
||||
// ERR-SICIVI: error: instruction not supported on this GPU
|
||||
|
|
|
@ -24,11 +24,3 @@ v_mov_b32_e32 v0, 0.5
|
|||
|
||||
v_mov_b32_e32 v0, 3.125
|
||||
// GCN: v_mov_b32_e32 v0, 0x40480000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x00,0x48,0x40]
|
||||
|
||||
v_add_i32 v0, vcc, 0.5, v0
|
||||
// SICI: v_add_i32_e32 v0, vcc, 0.5, v0 ; encoding: [0xf0,0x00,0x00,0x4a]
|
||||
// VI: v_add_i32_e32 v0, vcc, 0.5, v0 ; encoding: [0xf0,0x00,0x00,0x32]
|
||||
|
||||
v_add_i32 v0, vcc, 3.125, v0
|
||||
// SICI: v_add_i32_e32 v0, vcc, 0x40480000, v0 ; encoding: [0xff,0x00,0x00,0x4a,0x00,0x00,0x48,0x40]
|
||||
// VI: v_add_i32_e32 v0, vcc, 0x40480000, v0 ; encoding: [0xff,0x00,0x00,0x32,0x00,0x00,0x48,0x40]
|
|
@ -95,11 +95,11 @@ v_mul_i32_i24_e64 v1, s2, 3
|
|||
v_mul_i32_i24_e64 v1, 3, s3
|
||||
|
||||
// SICI: v_add_i32_e32 v0, vcc, 0.5, v0 ; encoding: [0xf0,0x00,0x00,0x4a]
|
||||
// VI: v_add_i32_e32 v0, vcc, 0.5, v0 ; encoding: [0xf0,0x00,0x00,0x32]
|
||||
// NOVI: error: instruction not supported on this GPU
|
||||
v_add_i32_e32 v0, vcc, 0.5, v0
|
||||
|
||||
// SICI: v_add_i32_e32 v0, vcc, 0x40480000, v0 ; encoding: [0xff,0x00,0x00,0x4a,0x00,0x00,0x48,0x40]
|
||||
// VI: v_add_i32_e32 v0, vcc, 0x40480000, v0 ; encoding: [0xff,0x00,0x00,0x32,0x00,0x00,0x48,0x40]
|
||||
// NOVI: error: instruction not supported on this GPU
|
||||
v_add_i32_e32 v0, vcc, 3.125, v0
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -271,59 +271,59 @@ v_mbcnt_lo_u32_b32_e64 v1, v2, v3
|
|||
v_mbcnt_hi_u32_b32_e64 v1, v2, v3
|
||||
|
||||
// SICI: v_add_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x4a]
|
||||
// VI: v_add_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x32]
|
||||
// NOVI: error: instruction not supported on this GPU
|
||||
v_add_i32_e32 v1, vcc, v2, v3
|
||||
|
||||
// SICI: v_add_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x4a,0xd2,0x02,0x07,0x02,0x00]
|
||||
// VI: v_add_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x19,0xd1,0x02,0x07,0x02,0x00]
|
||||
// NOVI: error: instruction not supported on this GPU
|
||||
v_add_i32 v1, s[0:1], v2, v3
|
||||
|
||||
// SICI: v_add_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x4a,0xd2,0x02,0x07,0x02,0x00]
|
||||
// VI: v_add_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x19,0xd1,0x02,0x07,0x02,0x00]
|
||||
// NOVI: error: instruction not supported on this GPU
|
||||
v_add_i32_e64 v1, s[0:1], v2, v3
|
||||
|
||||
// SICI: v_add_i32_e64 v1, vcc, v2, v3 ; encoding: [0x01,0x6a,0x4a,0xd2,0x02,0x07,0x02,0x00]
|
||||
// VI: v_add_i32_e64 v1, vcc, v2, v3 ; encoding: [0x01,0x6a,0x19,0xd1,0x02,0x07,0x02,0x00]
|
||||
// NOVI: error: instruction not supported on this GPU
|
||||
v_add_i32_e64 v1, vcc, v2, v3
|
||||
|
||||
// SICI: v_add_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x4a]
|
||||
// VI: v_add_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x32]
|
||||
// NOSICI: error: instruction not supported on this GPU
|
||||
// VI: v_add_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x32]
|
||||
v_add_u32 v1, vcc, v2, v3
|
||||
|
||||
// SICI: v_add_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x4a,0xd2,0x02,0x07,0x02,0x00]
|
||||
// VI: v_add_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x19,0xd1,0x02,0x07,0x02,0x00]
|
||||
// NOSICI: error: instruction not supported on this GPU
|
||||
// VI: v_add_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x19,0xd1,0x02,0x07,0x02,0x00]
|
||||
v_add_u32 v1, s[0:1], v2, v3
|
||||
|
||||
// SICI: v_sub_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x4c]
|
||||
// VI: v_sub_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x34]
|
||||
// NOVI: error: instruction not supported on this GPU
|
||||
v_sub_i32 v1, vcc, v2, v3
|
||||
|
||||
// SICI: v_sub_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x4c,0xd2,0x02,0x07,0x02,0x00]
|
||||
// VI: v_sub_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1a,0xd1,0x02,0x07,0x02,0x00]
|
||||
// NOVI: error: instruction not supported on this GPU
|
||||
v_sub_i32 v1, s[0:1], v2, v3
|
||||
|
||||
// SICI: v_sub_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x4c]
|
||||
// VI: v_sub_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x34]
|
||||
// NOSICI: error: instruction not supported on this GPU
|
||||
// VI: v_sub_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x34]
|
||||
v_sub_u32 v1, vcc, v2, v3
|
||||
|
||||
// SICI: v_sub_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x4c,0xd2,0x02,0x07,0x02,0x00]
|
||||
// VI: v_sub_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1a,0xd1,0x02,0x07,0x02,0x00]
|
||||
// NOSICI: error: instruction not supported on this GPU
|
||||
// VI: v_sub_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1a,0xd1,0x02,0x07,0x02,0x00]
|
||||
v_sub_u32 v1, s[0:1], v2, v3
|
||||
|
||||
// SICI: v_subrev_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x4e]
|
||||
// VI: v_subrev_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x36]
|
||||
// NOVI: error: instruction not supported on this GPU
|
||||
v_subrev_i32 v1, vcc, v2, v3
|
||||
|
||||
// SICI: v_subrev_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x4e,0xd2,0x02,0x07,0x02,0x00]
|
||||
// VI: v_subrev_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1b,0xd1,0x02,0x07,0x02,0x00]
|
||||
// NOVI: error: instruction not supported on this GPU
|
||||
v_subrev_i32 v1, s[0:1], v2, v3
|
||||
|
||||
// SICI: v_subrev_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x4e]
|
||||
// VI: v_subrev_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x36]
|
||||
// NOSICI: error: instruction not supported on this GPU
|
||||
// VI: v_subrev_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x36]
|
||||
v_subrev_u32 v1, vcc, v2, v3
|
||||
|
||||
// SICI: v_subrev_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x4e,0xd2,0x02,0x07,0x02,0x00]
|
||||
// VI: v_subrev_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1b,0xd1,0x02,0x07,0x02,0x00]
|
||||
// NOSICI: error: instruction not supported on this GPU
|
||||
// VI: v_subrev_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1b,0xd1,0x02,0x07,0x02,0x00]
|
||||
v_subrev_u32 v1, s[0:1], v2, v3
|
||||
|
||||
// SICI: v_addc_u32_e32 v1, vcc, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x50]
|
||||
|
|
|
@ -509,29 +509,65 @@ v_min_i16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
|
|||
v_ldexp_f16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
|
||||
|
||||
// NOSICI: error:
|
||||
// VI9: v_add_i32_dpp v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x32,0x02,0x01,0x09,0xa1]
|
||||
v_add_i32 v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
|
||||
// NOGFX9: error:
|
||||
// VI: v_add_u32_dpp v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x32,0x02,0x01,0x09,0xa1]
|
||||
v_add_u32 v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
|
||||
|
||||
// NOSICI: error:
|
||||
// VI9: v_sub_i32_dpp v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x34,0x02,0x01,0x09,0xa1]
|
||||
v_sub_i32 v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
|
||||
// NOGFX9: error:
|
||||
// VI: v_sub_u32_dpp v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x34,0x02,0x01,0x09,0xa1]
|
||||
v_sub_u32 v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
|
||||
|
||||
// NOSICI: error:
|
||||
// VI9: v_subrev_i32_dpp v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x36,0x02,0x01,0x09,0xa1]
|
||||
v_subrev_i32 v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
|
||||
// NOGFX9: error:
|
||||
// VI: v_subrev_u32_dpp v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x36,0x02,0x01,0x09,0xa1]
|
||||
v_subrev_u32 v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
|
||||
|
||||
// NOSICI: error:
|
||||
// VI9: v_addc_u32_dpp v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x38,0x02,0x01,0x09,0xa1]
|
||||
// NOGFX9: error:
|
||||
// VI: v_addc_u32_dpp v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x38,0x02,0x01,0x09,0xa1]
|
||||
v_addc_u32 v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
|
||||
|
||||
// NOSICI: error:
|
||||
// VI9: v_subb_u32_dpp v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x3a,0x02,0x01,0x09,0xa1]
|
||||
// NOGFX9: error:
|
||||
// VI: v_subb_u32_dpp v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x3a,0x02,0x01,0x09,0xa1]
|
||||
v_subb_u32 v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
|
||||
|
||||
// NOSICI: error:
|
||||
// VI9: v_subbrev_u32_dpp v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x3c,0x02,0x01,0x09,0xa1]
|
||||
// NOGFX9: error:
|
||||
// VI: v_subbrev_u32_dpp v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x3c,0x02,0x01,0x09,0xa1]
|
||||
v_subbrev_u32 v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
|
||||
|
||||
// NOSICI: error:
|
||||
// NOVI: error:
|
||||
// GFX9: v_add_co_u32_dpp v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x32,0x02,0x01,0x09,0xa1]
|
||||
v_add_co_u32 v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
|
||||
|
||||
// NOSICI: error:
|
||||
// NOVI: error:
|
||||
// GFX9: v_sub_co_u32_dpp v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x34,0x02,0x01,0x09,0xa1]
|
||||
v_sub_co_u32 v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
|
||||
|
||||
// NOSICI: error:
|
||||
// NOVI: error:
|
||||
// GFX9: v_subrev_co_u32_dpp v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x36,0x02,0x01,0x09,0xa1]
|
||||
v_subrev_co_u32 v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
|
||||
|
||||
// NOSICI: error:
|
||||
// NOVI: error:
|
||||
// GFX9: v_addc_co_u32_dpp v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x38,0x02,0x01,0x09,0xa1]
|
||||
v_addc_co_u32 v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
|
||||
|
||||
// NOSICI: error:
|
||||
// NOVI: error:
|
||||
// GFX9: v_subb_co_u32_dpp v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x3a,0x02,0x01,0x09,0xa1]
|
||||
v_subb_co_u32 v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
|
||||
|
||||
// NOSICI: error:
|
||||
// NOVI: error:
|
||||
// GFX9: v_subbrev_co_u32_dpp v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x3c,0x02,0x01,0x09,0xa1]
|
||||
v_subbrev_co_u32 v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Check that immideates and scalar regs are not supported
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
|
|
@ -497,29 +497,65 @@ v_min_i16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_se
|
|||
v_ldexp_f16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2
|
||||
|
||||
// NOSICI: error:
|
||||
// GFX89: v_add_i32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x32,0x02,0x06,0x05,0x02]
|
||||
v_add_i32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2
|
||||
// NOGFX9: error:
|
||||
// VI: v_add_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x32,0x02,0x06,0x05,0x02]
|
||||
v_add_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2
|
||||
|
||||
// NOSICI: error:
|
||||
// GFX89: v_sub_i32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x34,0x02,0x06,0x05,0x02]
|
||||
v_sub_i32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2
|
||||
// NOGFX9: error:
|
||||
// VI: v_sub_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x34,0x02,0x06,0x05,0x02]
|
||||
v_sub_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2
|
||||
|
||||
// NOSICI: error:
|
||||
// GFX89: v_subrev_i32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x36,0x02,0x06,0x05,0x02]
|
||||
v_subrev_i32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2
|
||||
// NOGFX9: error:
|
||||
// VI: v_subrev_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x36,0x02,0x06,0x05,0x02]
|
||||
v_subrev_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2
|
||||
|
||||
// NOSICI: error:
|
||||
// GFX89: v_addc_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x38,0x02,0x06,0x05,0x02]
|
||||
// NOGFX9: error:
|
||||
// VI: v_addc_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x38,0x02,0x06,0x05,0x02]
|
||||
v_addc_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2
|
||||
|
||||
// NOSICI: error:
|
||||
// GFX89: v_subb_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x3a,0x02,0x06,0x05,0x02]
|
||||
// NOGFX9: error:
|
||||
// VI: v_subb_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x3a,0x02,0x06,0x05,0x02]
|
||||
v_subb_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2
|
||||
|
||||
// NOSICI: error:
|
||||
// GFX89: v_subbrev_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x3c,0x02,0x06,0x05,0x02]
|
||||
// NOGFX9: error:
|
||||
// VI: v_subbrev_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x3c,0x02,0x06,0x05,0x02]
|
||||
v_subbrev_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2
|
||||
|
||||
// NOSICI: error:
|
||||
// NOVI: error:
|
||||
// GFX9: v_add_co_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x32,0x02,0x06,0x05,0x02]
|
||||
v_add_co_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2
|
||||
|
||||
// NOSICI: error:
|
||||
// NOVI: error:
|
||||
// GFX9: v_sub_co_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x34,0x02,0x06,0x05,0x02]
|
||||
v_sub_co_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2
|
||||
|
||||
// NOSICI: error:
|
||||
// NOVI: error:
|
||||
// GFX9: v_subrev_co_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x36,0x02,0x06,0x05,0x02]
|
||||
v_subrev_co_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2
|
||||
|
||||
// NOSICI: error:
|
||||
// NOVI: error:
|
||||
// GFX9: v_addc_co_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x38,0x02,0x06,0x05,0x02]
|
||||
v_addc_co_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2
|
||||
|
||||
// NOSICI: error:
|
||||
// NOVI: error:
|
||||
// GFX9: v_subb_co_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x3a,0x02,0x06,0x05,0x02]
|
||||
v_subb_co_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2
|
||||
|
||||
// NOSICI: error:
|
||||
// NOVI: error:
|
||||
// GFX9: v_subbrev_co_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x3c,0x02,0x06,0x05,0x02]
|
||||
v_subbrev_co_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Check VOPC opcodes
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
|
|
@ -93,46 +93,46 @@
|
|||
# VI: v_mbcnt_hi_u32_b32 v1, v2, v3 ; encoding: [0x01,0x00,0x8d,0xd2,0x02,0x07,0x02,0x00]
|
||||
0x01 0x00 0x8d 0xd2 0x02 0x07 0x02 0x00
|
||||
|
||||
# VI: v_add_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x32]
|
||||
# VI: v_add_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x32]
|
||||
0x02 0x07 0x02 0x32
|
||||
|
||||
# VI: v_add_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x19,0xd1,0x02,0x07,0x02,0x00]
|
||||
# VI: v_add_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x19,0xd1,0x02,0x07,0x02,0x00]
|
||||
0x01 0x00 0x19 0xd1 0x02 0x07 0x02 0x00
|
||||
|
||||
# VI: v_add_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x19,0xd1,0x02,0x07,0x02,0x00]
|
||||
# VI: v_add_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x19,0xd1,0x02,0x07,0x02,0x00]
|
||||
0x01 0x00 0x19 0xd1 0x02 0x07 0x02 0x00
|
||||
|
||||
# VI: v_add_i32_e64 v1, vcc, v2, v3 ; encoding: [0x01,0x6a,0x19,0xd1,0x02,0x07,0x02,0x00]
|
||||
# VI: v_add_u32_e64 v1, vcc, v2, v3 ; encoding: [0x01,0x6a,0x19,0xd1,0x02,0x07,0x02,0x00]
|
||||
0x01 0x6a 0x19 0xd1 0x02 0x07 0x02 0x00
|
||||
|
||||
# VI: v_add_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x32]
|
||||
# VI: v_add_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x32]
|
||||
0x02 0x07 0x02 0x32
|
||||
|
||||
# VI: v_add_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x19,0xd1,0x02,0x07,0x02,0x00]
|
||||
# VI: v_add_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x19,0xd1,0x02,0x07,0x02,0x00]
|
||||
0x01 0x00 0x19 0xd1 0x02 0x07 0x02 0x00
|
||||
|
||||
# VI: v_sub_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x34]
|
||||
# VI: v_sub_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x34]
|
||||
0x02 0x07 0x02 0x34
|
||||
|
||||
# VI: v_sub_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1a,0xd1,0x02,0x07,0x02,0x00]
|
||||
# VI: v_sub_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1a,0xd1,0x02,0x07,0x02,0x00]
|
||||
0x01 0x00 0x1a 0xd1 0x02 0x07 0x02 0x00
|
||||
|
||||
# VI: v_sub_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x34]
|
||||
# VI: v_sub_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x34]
|
||||
0x02 0x07 0x02 0x34
|
||||
|
||||
# VI: v_sub_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1a,0xd1,0x02,0x07,0x02,0x00]
|
||||
# VI: v_sub_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1a,0xd1,0x02,0x07,0x02,0x00]
|
||||
0x01 0x00 0x1a 0xd1 0x02 0x07 0x02 0x00
|
||||
|
||||
# VI: v_subrev_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x36]
|
||||
# VI: v_subrev_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x36]
|
||||
0x02 0x07 0x02 0x36
|
||||
|
||||
# VI: v_subrev_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1b,0xd1,0x02,0x07,0x02,0x00]
|
||||
# VI: v_subrev_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1b,0xd1,0x02,0x07,0x02,0x00]
|
||||
0x01 0x00 0x1b 0xd1 0x02 0x07 0x02 0x00
|
||||
|
||||
# VI: v_subrev_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x36]
|
||||
# VI: v_subrev_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x36]
|
||||
0x02 0x07 0x02 0x36
|
||||
|
||||
# VI: v_subrev_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1b,0xd1,0x02,0x07,0x02,0x00]
|
||||
# VI: v_subrev_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1b,0xd1,0x02,0x07,0x02,0x00]
|
||||
0x01 0x00 0x1b 0xd1 0x02 0x07 0x02 0x00
|
||||
|
||||
# VI: v_addc_u32_e32 v1, vcc, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x38]
|
||||
|
|
|
@ -12,7 +12,7 @@ hello_world:
|
|||
s_waitcnt lgkmcnt(0)
|
||||
s_add_u32 s0, s7, s0
|
||||
BB0:
|
||||
v_add_i32_e32 v1, vcc, s0, v1
|
||||
v_add_u32_e32 v1, vcc, s0, v1
|
||||
BB1:
|
||||
s_movk_i32 s0, 0x483
|
||||
v_cmp_ge_i32_e32 vcc, s0, v0
|
||||
|
@ -37,7 +37,7 @@ BB5:
|
|||
v_ashrrev_i32_e32 v77, 31, v76
|
||||
v_lshlrev_b64 v[10:11], 2, v[76:77]
|
||||
s_waitcnt lgkmcnt(0)
|
||||
v_add_i32_e32 v10, vcc, s8, v10
|
||||
v_add_u32_e32 v10, vcc, s8, v10
|
||||
v_mov_b32_e32 v6, s9
|
||||
v_addc_u32_e32 v11, vcc, v6, v11, vcc
|
||||
flat_load_dword v0, v[10:11]
|
||||
|
@ -53,7 +53,7 @@ BB5:
|
|||
// CHECK: s_waitcnt lgkmcnt(0) // 000000000110: BF8C007F
|
||||
// CHECK: s_add_u32 s0, s7, s0 // 000000000114: 80000007
|
||||
// CHECK: BB0:
|
||||
// CHECK: v_add_i32_e32 v1, vcc, s0, v1 // 000000000118: 32020200
|
||||
// CHECK: v_add_u32_e32 v1, vcc, s0, v1 // 000000000118: 32020200
|
||||
// CHECK: BB1:
|
||||
// CHECK: s_movk_i32 s0, 0x483 // 00000000011C: B0000483
|
||||
// CHECK: v_cmp_ge_i32_e32 vcc, s0, v0 // 000000000120: 7D8C0000
|
||||
|
@ -74,7 +74,7 @@ BB5:
|
|||
// CHECK: v_ashrrev_i32_e32 v77, 31, v76 // 000000000250: 229A989F
|
||||
// CHECK: v_lshlrev_b64 v[10:11], 2, v[76:77] // 000000000254: D28F000A 00029882
|
||||
// CHECK: s_waitcnt lgkmcnt(0) // 00000000025C: BF8C007F
|
||||
// CHECK: v_add_i32_e32 v10, vcc, s8, v10 // 000000000260: 32141408
|
||||
// CHECK: v_add_u32_e32 v10, vcc, s8, v10 // 000000000260: 32141408
|
||||
// CHECK: v_mov_b32_e32 v6, s9 // 000000000264: 7E0C0209
|
||||
// CHECK: v_addc_u32_e32 v11, vcc, v6, v11, vcc // 000000000268: 38161706
|
||||
// CHECK: flat_load_dword v0, v[10:11] // 00000000026C: DC500000 0000000A
|
||||
|
|
|
@ -12,7 +12,7 @@
|
|||
; LINE: ; {{.*}}source-lines.cl:3
|
||||
; LINE: v_mov_b32_e32 v{{[0-9]+}}, 0x888
|
||||
; LINE: ; {{.*}}source-lines.cl:4
|
||||
; LINE: v_add_i32_e32
|
||||
; LINE: v_add_u32_e32
|
||||
; LINE: ; {{.*}}source-lines.cl:5
|
||||
; LINE: flat_store_dword
|
||||
; Epilogue.
|
||||
|
@ -28,7 +28,7 @@
|
|||
; SOURCE: ; int var1 = 0x888;
|
||||
; SOURCE: v_mov_b32_e32 v{{[0-9]+}}, 0x888
|
||||
; SOURCE: ; int var2 = var0 + var1;
|
||||
; SOURCE: v_add_i32_e32
|
||||
; SOURCE: v_add_u32_e32
|
||||
; SOURCE: ; *Out = var2;
|
||||
; SOURCE: flat_store_dword
|
||||
; Epilogue.
|
||||
|
|
Loading…
Reference in New Issue