2016-09-23 17:08:07 +08:00
|
|
|
//===-- VOP1Instructions.td - Vector Instruction Defintions ---------------===//
|
|
|
|
//
|
|
|
|
// The LLVM Compiler Infrastructure
|
|
|
|
//
|
|
|
|
// This file is distributed under the University of Illinois Open Source
|
|
|
|
// License. See LICENSE.TXT for details.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// VOP1 Classes
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
class VOP1e <bits<8> op, VOPProfile P> : Enc32 {
|
|
|
|
bits<8> vdst;
|
|
|
|
bits<9> src0;
|
|
|
|
|
|
|
|
let Inst{8-0} = !if(P.HasSrc0, src0{8-0}, 0);
|
|
|
|
let Inst{16-9} = op;
|
|
|
|
let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0);
|
|
|
|
let Inst{31-25} = 0x3f; //encoding
|
|
|
|
}
|
|
|
|
|
2016-12-22 20:57:41 +08:00
|
|
|
class VOP1_SDWAe <bits<8> op, VOPProfile P> : VOP_SDWAe <P> {
|
|
|
|
bits<8> vdst;
|
2017-02-10 10:42:31 +08:00
|
|
|
|
2016-12-22 20:57:41 +08:00
|
|
|
let Inst{8-0} = 0xf9; // sdwa
|
|
|
|
let Inst{16-9} = op;
|
|
|
|
let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0);
|
|
|
|
let Inst{31-25} = 0x3f; // encoding
|
|
|
|
}
|
|
|
|
|
2017-05-23 18:08:55 +08:00
|
|
|
class VOP1_SDWA9Ae <bits<8> op, VOPProfile P> : VOP_SDWA9Ae <P> {
|
|
|
|
bits<8> vdst;
|
|
|
|
|
|
|
|
let Inst{8-0} = 0xf9; // sdwa
|
|
|
|
let Inst{16-9} = op;
|
|
|
|
let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0);
|
|
|
|
let Inst{31-25} = 0x3f; // encoding
|
|
|
|
}
|
|
|
|
|
2017-03-01 05:09:04 +08:00
|
|
|
class VOP1_Pseudo <string opName, VOPProfile P, list<dag> pattern=[], bit VOP1Only = 0> :
|
2016-09-23 17:08:07 +08:00
|
|
|
InstSI <P.Outs32, P.Ins32, "", pattern>,
|
|
|
|
VOP <opName>,
|
2017-03-01 05:09:04 +08:00
|
|
|
SIMCInstr <!if(VOP1Only, opName, opName#"_e32"), SIEncodingFamily.NONE>,
|
|
|
|
MnemonicAlias<!if(VOP1Only, opName, opName#"_e32"), opName> {
|
2016-09-23 17:08:07 +08:00
|
|
|
|
|
|
|
let isPseudo = 1;
|
|
|
|
let isCodeGenOnly = 1;
|
|
|
|
let UseNamedOperandTable = 1;
|
|
|
|
|
|
|
|
string Mnemonic = opName;
|
|
|
|
string AsmOperands = P.Asm32;
|
|
|
|
|
|
|
|
let Size = 4;
|
|
|
|
let mayLoad = 0;
|
|
|
|
let mayStore = 0;
|
|
|
|
let hasSideEffects = 0;
|
|
|
|
let SubtargetPredicate = isGCN;
|
|
|
|
|
|
|
|
let VOP1 = 1;
|
|
|
|
let VALU = 1;
|
|
|
|
let Uses = [EXEC];
|
|
|
|
|
|
|
|
let AsmVariantName = AMDGPUAsmVariants.Default;
|
|
|
|
|
|
|
|
VOPProfile Pfl = P;
|
|
|
|
}
|
|
|
|
|
|
|
|
class VOP1_Real <VOP1_Pseudo ps, int EncodingFamily> :
|
|
|
|
InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>,
|
|
|
|
SIMCInstr <ps.PseudoInstr, EncodingFamily> {
|
|
|
|
|
|
|
|
let isPseudo = 0;
|
|
|
|
let isCodeGenOnly = 0;
|
|
|
|
|
2016-12-22 19:30:48 +08:00
|
|
|
let Constraints = ps.Constraints;
|
|
|
|
let DisableEncoding = ps.DisableEncoding;
|
|
|
|
|
2016-09-23 17:08:07 +08:00
|
|
|
// copy relevant pseudo op flags
|
|
|
|
let SubtargetPredicate = ps.SubtargetPredicate;
|
|
|
|
let AsmMatchConverter = ps.AsmMatchConverter;
|
|
|
|
let AsmVariantName = ps.AsmVariantName;
|
|
|
|
let Constraints = ps.Constraints;
|
|
|
|
let DisableEncoding = ps.DisableEncoding;
|
|
|
|
let TSFlags = ps.TSFlags;
|
2017-03-03 22:31:06 +08:00
|
|
|
let UseNamedOperandTable = ps.UseNamedOperandTable;
|
|
|
|
let Uses = ps.Uses;
|
2016-09-23 17:08:07 +08:00
|
|
|
}
|
|
|
|
|
2016-12-22 20:57:41 +08:00
|
|
|
class VOP1_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
|
|
|
|
VOP_SDWA_Pseudo <OpName, P, pattern> {
|
|
|
|
let AsmMatchConverter = "cvtSdwaVOP1";
|
|
|
|
}
|
|
|
|
|
2016-09-23 17:08:07 +08:00
|
|
|
class getVOP1Pat64 <SDPatternOperator node, VOPProfile P> : LetDummies {
|
2017-03-27 23:57:17 +08:00
|
|
|
list<dag> ret =
|
|
|
|
!if(P.HasModifiers,
|
|
|
|
[(set P.DstVT:$vdst, (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0,
|
|
|
|
i32:$src0_modifiers,
|
|
|
|
i1:$clamp, i32:$omod))))],
|
|
|
|
!if(P.HasOMod,
|
|
|
|
[(set P.DstVT:$vdst, (node (P.Src0VT (VOP3OMods P.Src0VT:$src0,
|
|
|
|
i1:$clamp, i32:$omod))))],
|
|
|
|
[(set P.DstVT:$vdst, (node P.Src0VT:$src0))]
|
|
|
|
)
|
|
|
|
);
|
2016-09-23 17:08:07 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
multiclass VOP1Inst <string opName, VOPProfile P,
|
|
|
|
SDPatternOperator node = null_frag> {
|
|
|
|
def _e32 : VOP1_Pseudo <opName, P>;
|
|
|
|
def _e64 : VOP3_Pseudo <opName, P, getVOP1Pat64<node, P>.ret>;
|
2016-12-22 20:57:41 +08:00
|
|
|
def _sdwa : VOP1_SDWA_Pseudo <opName, P>;
|
2016-09-23 17:08:07 +08:00
|
|
|
}
|
|
|
|
|
2017-03-27 23:57:17 +08:00
|
|
|
// Special profile for instructions which have clamp
|
|
|
|
// and output modifiers (but have no input modifiers)
|
|
|
|
class VOPProfileI2F<ValueType dstVt, ValueType srcVt> :
|
|
|
|
VOPProfile<[dstVt, srcVt, untyped, untyped]> {
|
|
|
|
|
|
|
|
let Ins64 = (ins Src0RC64:$src0, clampmod:$clamp, omod:$omod);
|
|
|
|
let Asm64 = "$vdst, $src0$clamp$omod";
|
|
|
|
|
|
|
|
let HasModifiers = 0;
|
|
|
|
let HasClamp = 1;
|
|
|
|
let HasOMod = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
def VOP1_F64_I32 : VOPProfileI2F <f64, i32>;
|
|
|
|
def VOP1_F32_I32 : VOPProfileI2F <f32, i32>;
|
|
|
|
def VOP1_F16_I16 : VOPProfileI2F <f16, i16>;
|
|
|
|
|
2016-09-23 17:08:07 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// VOP1 Instructions
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
let VOPAsmPrefer32Bit = 1 in {
|
|
|
|
defm V_NOP : VOP1Inst <"v_nop", VOP_NONE>;
|
|
|
|
}
|
|
|
|
|
|
|
|
let isMoveImm = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in {
|
|
|
|
defm V_MOV_B32 : VOP1Inst <"v_mov_b32", VOP_I32_I32>;
|
|
|
|
} // End isMoveImm = 1
|
|
|
|
|
|
|
|
// FIXME: Specify SchedRW for READFIRSTLANE_B32
|
|
|
|
// TODO: Make profile for this, there is VOP3 encoding also
|
|
|
|
def V_READFIRSTLANE_B32 :
|
|
|
|
InstSI <(outs SReg_32:$vdst),
|
|
|
|
(ins VGPR_32:$src0),
|
|
|
|
"v_readfirstlane_b32 $vdst, $src0",
|
|
|
|
[(set i32:$vdst, (int_amdgcn_readfirstlane i32:$src0))]>,
|
|
|
|
Enc32 {
|
|
|
|
|
|
|
|
let isCodeGenOnly = 0;
|
|
|
|
let UseNamedOperandTable = 1;
|
|
|
|
|
|
|
|
let Size = 4;
|
|
|
|
let mayLoad = 0;
|
|
|
|
let mayStore = 0;
|
|
|
|
let hasSideEffects = 0;
|
|
|
|
let SubtargetPredicate = isGCN;
|
|
|
|
|
|
|
|
let VOP1 = 1;
|
|
|
|
let VALU = 1;
|
|
|
|
let Uses = [EXEC];
|
|
|
|
let isConvergent = 1;
|
|
|
|
|
|
|
|
bits<8> vdst;
|
|
|
|
bits<9> src0;
|
|
|
|
|
|
|
|
let Inst{8-0} = src0;
|
|
|
|
let Inst{16-9} = 0x2;
|
|
|
|
let Inst{24-17} = vdst;
|
|
|
|
let Inst{31-25} = 0x3f; //encoding
|
|
|
|
}
|
|
|
|
|
|
|
|
let SchedRW = [WriteQuarterRate32] in {
|
|
|
|
defm V_CVT_I32_F64 : VOP1Inst <"v_cvt_i32_f64", VOP_I32_F64, fp_to_sint>;
|
2017-03-27 23:57:17 +08:00
|
|
|
defm V_CVT_F64_I32 : VOP1Inst <"v_cvt_f64_i32", VOP1_F64_I32, sint_to_fp>;
|
|
|
|
defm V_CVT_F32_I32 : VOP1Inst <"v_cvt_f32_i32", VOP1_F32_I32, sint_to_fp>;
|
|
|
|
defm V_CVT_F32_U32 : VOP1Inst <"v_cvt_f32_u32", VOP1_F32_I32, uint_to_fp>;
|
2016-09-23 17:08:07 +08:00
|
|
|
defm V_CVT_U32_F32 : VOP1Inst <"v_cvt_u32_f32", VOP_I32_F32, fp_to_uint>;
|
|
|
|
defm V_CVT_I32_F32 : VOP1Inst <"v_cvt_i32_f32", VOP_I32_F32, fp_to_sint>;
|
2017-02-02 10:27:04 +08:00
|
|
|
defm V_CVT_F16_F32 : VOP1Inst <"v_cvt_f16_f32", VOP_F16_F32, fpround>;
|
|
|
|
defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_F16, fpextend>;
|
2016-09-23 17:08:07 +08:00
|
|
|
defm V_CVT_RPI_I32_F32 : VOP1Inst <"v_cvt_rpi_i32_f32", VOP_I32_F32, cvt_rpi_i32_f32>;
|
|
|
|
defm V_CVT_FLR_I32_F32 : VOP1Inst <"v_cvt_flr_i32_f32", VOP_I32_F32, cvt_flr_i32_f32>;
|
2017-03-27 23:57:17 +08:00
|
|
|
defm V_CVT_OFF_F32_I4 : VOP1Inst <"v_cvt_off_f32_i4", VOP1_F32_I32>;
|
2016-09-23 17:08:07 +08:00
|
|
|
defm V_CVT_F32_F64 : VOP1Inst <"v_cvt_f32_f64", VOP_F32_F64, fpround>;
|
|
|
|
defm V_CVT_F64_F32 : VOP1Inst <"v_cvt_f64_f32", VOP_F64_F32, fpextend>;
|
2017-03-27 23:57:17 +08:00
|
|
|
defm V_CVT_F32_UBYTE0 : VOP1Inst <"v_cvt_f32_ubyte0", VOP1_F32_I32, AMDGPUcvt_f32_ubyte0>;
|
|
|
|
defm V_CVT_F32_UBYTE1 : VOP1Inst <"v_cvt_f32_ubyte1", VOP1_F32_I32, AMDGPUcvt_f32_ubyte1>;
|
|
|
|
defm V_CVT_F32_UBYTE2 : VOP1Inst <"v_cvt_f32_ubyte2", VOP1_F32_I32, AMDGPUcvt_f32_ubyte2>;
|
|
|
|
defm V_CVT_F32_UBYTE3 : VOP1Inst <"v_cvt_f32_ubyte3", VOP1_F32_I32, AMDGPUcvt_f32_ubyte3>;
|
2016-09-23 17:08:07 +08:00
|
|
|
defm V_CVT_U32_F64 : VOP1Inst <"v_cvt_u32_f64", VOP_I32_F64, fp_to_uint>;
|
2017-03-27 23:57:17 +08:00
|
|
|
defm V_CVT_F64_U32 : VOP1Inst <"v_cvt_f64_u32", VOP1_F64_I32, uint_to_fp>;
|
2016-09-23 17:08:07 +08:00
|
|
|
} // End SchedRW = [WriteQuarterRate32]
|
|
|
|
|
|
|
|
defm V_FRACT_F32 : VOP1Inst <"v_fract_f32", VOP_F32_F32, AMDGPUfract>;
|
|
|
|
defm V_TRUNC_F32 : VOP1Inst <"v_trunc_f32", VOP_F32_F32, ftrunc>;
|
|
|
|
defm V_CEIL_F32 : VOP1Inst <"v_ceil_f32", VOP_F32_F32, fceil>;
|
|
|
|
defm V_RNDNE_F32 : VOP1Inst <"v_rndne_f32", VOP_F32_F32, frint>;
|
|
|
|
defm V_FLOOR_F32 : VOP1Inst <"v_floor_f32", VOP_F32_F32, ffloor>;
|
|
|
|
defm V_EXP_F32 : VOP1Inst <"v_exp_f32", VOP_F32_F32, fexp2>;
|
|
|
|
|
|
|
|
let SchedRW = [WriteQuarterRate32] in {
|
|
|
|
defm V_LOG_F32 : VOP1Inst <"v_log_f32", VOP_F32_F32, flog2>;
|
|
|
|
defm V_RCP_F32 : VOP1Inst <"v_rcp_f32", VOP_F32_F32, AMDGPUrcp>;
|
|
|
|
defm V_RCP_IFLAG_F32 : VOP1Inst <"v_rcp_iflag_f32", VOP_F32_F32>;
|
|
|
|
defm V_RSQ_F32 : VOP1Inst <"v_rsq_f32", VOP_F32_F32, AMDGPUrsq>;
|
|
|
|
} // End SchedRW = [WriteQuarterRate32]
|
|
|
|
|
|
|
|
let SchedRW = [WriteDouble] in {
|
|
|
|
defm V_RCP_F64 : VOP1Inst <"v_rcp_f64", VOP_F64_F64, AMDGPUrcp>;
|
|
|
|
defm V_RSQ_F64 : VOP1Inst <"v_rsq_f64", VOP_F64_F64, AMDGPUrsq>;
|
|
|
|
} // End SchedRW = [WriteDouble];
|
|
|
|
|
|
|
|
defm V_SQRT_F32 : VOP1Inst <"v_sqrt_f32", VOP_F32_F32, fsqrt>;
|
|
|
|
|
|
|
|
let SchedRW = [WriteDouble] in {
|
|
|
|
defm V_SQRT_F64 : VOP1Inst <"v_sqrt_f64", VOP_F64_F64, fsqrt>;
|
|
|
|
} // End SchedRW = [WriteDouble]
|
|
|
|
|
|
|
|
let SchedRW = [WriteQuarterRate32] in {
|
|
|
|
defm V_SIN_F32 : VOP1Inst <"v_sin_f32", VOP_F32_F32, AMDGPUsin>;
|
|
|
|
defm V_COS_F32 : VOP1Inst <"v_cos_f32", VOP_F32_F32, AMDGPUcos>;
|
|
|
|
} // End SchedRW = [WriteQuarterRate32]
|
|
|
|
|
|
|
|
defm V_NOT_B32 : VOP1Inst <"v_not_b32", VOP_I32_I32>;
|
|
|
|
defm V_BFREV_B32 : VOP1Inst <"v_bfrev_b32", VOP_I32_I32>;
|
|
|
|
defm V_FFBH_U32 : VOP1Inst <"v_ffbh_u32", VOP_I32_I32>;
|
|
|
|
defm V_FFBL_B32 : VOP1Inst <"v_ffbl_b32", VOP_I32_I32>;
|
|
|
|
defm V_FFBH_I32 : VOP1Inst <"v_ffbh_i32", VOP_I32_I32>;
|
|
|
|
defm V_FREXP_EXP_I32_F64 : VOP1Inst <"v_frexp_exp_i32_f64", VOP_I32_F64, int_amdgcn_frexp_exp>;
|
|
|
|
|
|
|
|
let SchedRW = [WriteDoubleAdd] in {
|
|
|
|
defm V_FREXP_MANT_F64 : VOP1Inst <"v_frexp_mant_f64", VOP_F64_F64, int_amdgcn_frexp_mant>;
|
|
|
|
defm V_FRACT_F64 : VOP1Inst <"v_fract_f64", VOP_F64_F64, AMDGPUfract>;
|
|
|
|
} // End SchedRW = [WriteDoubleAdd]
|
|
|
|
|
|
|
|
defm V_FREXP_EXP_I32_F32 : VOP1Inst <"v_frexp_exp_i32_f32", VOP_I32_F32, int_amdgcn_frexp_exp>;
|
|
|
|
defm V_FREXP_MANT_F32 : VOP1Inst <"v_frexp_mant_f32", VOP_F32_F32, int_amdgcn_frexp_mant>;
|
|
|
|
|
|
|
|
let VOPAsmPrefer32Bit = 1 in {
|
|
|
|
defm V_CLREXCP : VOP1Inst <"v_clrexcp", VOP_NO_EXT<VOP_NONE>>;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Restrict src0 to be VGPR
|
|
|
|
def VOP_I32_VI32_NO_EXT : VOPProfile<[i32, i32, untyped, untyped]> {
|
|
|
|
let Src0RC32 = VRegSrc_32;
|
|
|
|
let Src0RC64 = VRegSrc_32;
|
|
|
|
|
|
|
|
let HasExt = 0;
|
2017-05-23 18:08:55 +08:00
|
|
|
let HasSDWA9 = 0;
|
2016-09-23 17:08:07 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Special case because there are no true output operands. Hack vdst
|
|
|
|
// to be a src operand. The custom inserter must add a tied implicit
|
|
|
|
// def and use of the super register since there seems to be no way to
|
|
|
|
// add an implicit def of a virtual register in tablegen.
|
|
|
|
def VOP_MOVRELD : VOPProfile<[untyped, i32, untyped, untyped]> {
|
|
|
|
let Src0RC32 = VOPDstOperand<VGPR_32>;
|
|
|
|
let Src0RC64 = VOPDstOperand<VGPR_32>;
|
|
|
|
|
|
|
|
let Outs = (outs);
|
|
|
|
let Ins32 = (ins Src0RC32:$vdst, VSrc_b32:$src0);
|
|
|
|
let Ins64 = (ins Src0RC64:$vdst, VSrc_b32:$src0);
|
[AMDGPU] Add pseudo "old" source to all DPP instructions
Summary:
All instructions with the DPP modifier may not write to certain lanes of
the output if bound_ctrl=1 is set or any bits in bank_mask or row_mask
aren't set, so the destination register may be both defined and modified.
The right way to handle this is to add a constraint that the destination
register is the same as one of the inputs. We could tie the destination
to the first source, but that would be too restrictive for some use-cases
where we want the destination to be some other value before the
instruction executes. Instead, add a fake "old" source and tie it to the
destination. Effectively, the "old" source defines what value unwritten
lanes will get. We'll expose this functionality to users with a new
intrinsic later.
Also, we want to use DPP instructions for computing derivatives, which
means we need to set WQM for them. We also need to enable the entire
wavefront when using DPP intrinsics to implement nonuniform subgroup
reductions, since otherwise we'll get incorrect results in some cases.
To accomodate this, add a new operand to all DPP instructions which will
be interpreted by the SI WQM pass. This will be exposed with a new
intrinsic later. We'll also add support for Whole Wavefront Mode later.
I also fixed llvm.amdgcn.mov.dpp to overwrite the source and fixed up
the test. However, I could also keep the old behavior (where lanes that
aren't written are undefined) if people want it.
Reviewers: tstellar, arsenm
Subscribers: kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye
Differential Revision: https://reviews.llvm.org/D34716
llvm-svn: 310283
2017-08-08 03:10:56 +08:00
|
|
|
let InsDPP = (ins DstRC:$vdst, DstRC:$old, Src0RC32:$src0,
|
|
|
|
dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
|
2016-09-23 17:08:07 +08:00
|
|
|
bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
|
2017-06-21 16:53:38 +08:00
|
|
|
|
2017-05-23 18:08:55 +08:00
|
|
|
let InsSDWA = (ins Src0RC32:$vdst, Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0,
|
2017-06-21 16:53:38 +08:00
|
|
|
clampmod:$clamp, omod:$omod, dst_sel:$dst_sel, dst_unused:$dst_unused,
|
2016-09-23 17:08:07 +08:00
|
|
|
src0_sel:$src0_sel);
|
|
|
|
|
|
|
|
let Asm32 = getAsm32<1, 1>.ret;
|
2017-08-16 21:51:56 +08:00
|
|
|
let Asm64 = getAsm64<1, 1, 0, 0, 1>.ret;
|
2016-09-23 17:08:07 +08:00
|
|
|
let AsmDPP = getAsmDPP<1, 1, 0>.ret;
|
2017-05-23 18:08:55 +08:00
|
|
|
let AsmSDWA = getAsmSDWA<1, 1>.ret;
|
|
|
|
let AsmSDWA9 = getAsmSDWA9<1, 0, 1>.ret;
|
2016-09-23 17:08:07 +08:00
|
|
|
|
|
|
|
let HasExt = 0;
|
2017-05-23 18:08:55 +08:00
|
|
|
let HasSDWA9 = 0;
|
2016-09-23 17:08:07 +08:00
|
|
|
let HasDst = 0;
|
|
|
|
let EmitDst = 1; // force vdst emission
|
|
|
|
}
|
|
|
|
|
2016-10-13 02:00:51 +08:00
|
|
|
let SubtargetPredicate = HasMovrel, Uses = [M0, EXEC] in {
|
2016-09-23 17:08:07 +08:00
|
|
|
// v_movreld_b32 is a special case because the destination output
|
|
|
|
// register is really a source. It isn't actually read (but may be
|
|
|
|
// written), and is only to provide the base register to start
|
|
|
|
// indexing from. Tablegen seems to not let you define an implicit
|
|
|
|
// virtual register output for the super register being written into,
|
|
|
|
// so this must have an implicit def of the register added to it.
|
|
|
|
defm V_MOVRELD_B32 : VOP1Inst <"v_movreld_b32", VOP_MOVRELD>;
|
|
|
|
defm V_MOVRELS_B32 : VOP1Inst <"v_movrels_b32", VOP_I32_VI32_NO_EXT>;
|
|
|
|
defm V_MOVRELSD_B32 : VOP1Inst <"v_movrelsd_b32", VOP_NO_EXT<VOP_I32_I32>>;
|
|
|
|
} // End Uses = [M0, EXEC]
|
|
|
|
|
2017-04-13 01:10:07 +08:00
|
|
|
let SchedRW = [WriteQuarterRate32] in {
|
|
|
|
defm V_MOV_FED_B32 : VOP1Inst <"v_mov_fed_b32", VOP_I32_I32>;
|
|
|
|
}
|
|
|
|
|
2016-09-23 17:08:07 +08:00
|
|
|
// These instruction only exist on SI and CI
|
|
|
|
let SubtargetPredicate = isSICI in {
|
|
|
|
|
|
|
|
let SchedRW = [WriteQuarterRate32] in {
|
|
|
|
defm V_LOG_CLAMP_F32 : VOP1Inst <"v_log_clamp_f32", VOP_F32_F32, int_amdgcn_log_clamp>;
|
|
|
|
defm V_RCP_CLAMP_F32 : VOP1Inst <"v_rcp_clamp_f32", VOP_F32_F32>;
|
|
|
|
defm V_RCP_LEGACY_F32 : VOP1Inst <"v_rcp_legacy_f32", VOP_F32_F32, AMDGPUrcp_legacy>;
|
|
|
|
defm V_RSQ_CLAMP_F32 : VOP1Inst <"v_rsq_clamp_f32", VOP_F32_F32, AMDGPUrsq_clamp>;
|
|
|
|
defm V_RSQ_LEGACY_F32 : VOP1Inst <"v_rsq_legacy_f32", VOP_F32_F32, AMDGPUrsq_legacy>;
|
|
|
|
} // End SchedRW = [WriteQuarterRate32]
|
|
|
|
|
|
|
|
let SchedRW = [WriteDouble] in {
|
|
|
|
defm V_RCP_CLAMP_F64 : VOP1Inst <"v_rcp_clamp_f64", VOP_F64_F64>;
|
|
|
|
defm V_RSQ_CLAMP_F64 : VOP1Inst <"v_rsq_clamp_f64", VOP_F64_F64, AMDGPUrsq_clamp>;
|
|
|
|
} // End SchedRW = [WriteDouble]
|
|
|
|
|
|
|
|
} // End SubtargetPredicate = isSICI
|
|
|
|
|
|
|
|
|
|
|
|
let SubtargetPredicate = isCIVI in {
|
|
|
|
|
|
|
|
let SchedRW = [WriteDoubleAdd] in {
|
|
|
|
defm V_TRUNC_F64 : VOP1Inst <"v_trunc_f64", VOP_F64_F64, ftrunc>;
|
|
|
|
defm V_CEIL_F64 : VOP1Inst <"v_ceil_f64", VOP_F64_F64, fceil>;
|
|
|
|
defm V_FLOOR_F64 : VOP1Inst <"v_floor_f64", VOP_F64_F64, ffloor>;
|
|
|
|
defm V_RNDNE_F64 : VOP1Inst <"v_rndne_f64", VOP_F64_F64, frint>;
|
|
|
|
} // End SchedRW = [WriteDoubleAdd]
|
|
|
|
|
|
|
|
let SchedRW = [WriteQuarterRate32] in {
|
|
|
|
defm V_LOG_LEGACY_F32 : VOP1Inst <"v_log_legacy_f32", VOP_F32_F32>;
|
|
|
|
defm V_EXP_LEGACY_F32 : VOP1Inst <"v_exp_legacy_f32", VOP_F32_F32>;
|
|
|
|
} // End SchedRW = [WriteQuarterRate32]
|
|
|
|
|
|
|
|
} // End SubtargetPredicate = isCIVI
|
|
|
|
|
|
|
|
|
2017-05-23 18:08:55 +08:00
|
|
|
let SubtargetPredicate = Has16BitInsts in {
|
2016-09-23 17:08:07 +08:00
|
|
|
|
2017-03-27 23:57:17 +08:00
|
|
|
defm V_CVT_F16_U16 : VOP1Inst <"v_cvt_f16_u16", VOP1_F16_I16, uint_to_fp>;
|
|
|
|
defm V_CVT_F16_I16 : VOP1Inst <"v_cvt_f16_i16", VOP1_F16_I16, sint_to_fp>;
|
2016-11-13 15:01:11 +08:00
|
|
|
defm V_CVT_U16_F16 : VOP1Inst <"v_cvt_u16_f16", VOP_I16_F16, fp_to_uint>;
|
|
|
|
defm V_CVT_I16_F16 : VOP1Inst <"v_cvt_i16_f16", VOP_I16_F16, fp_to_sint>;
|
|
|
|
defm V_RCP_F16 : VOP1Inst <"v_rcp_f16", VOP_F16_F16, AMDGPUrcp>;
|
|
|
|
defm V_SQRT_F16 : VOP1Inst <"v_sqrt_f16", VOP_F16_F16, fsqrt>;
|
|
|
|
defm V_RSQ_F16 : VOP1Inst <"v_rsq_f16", VOP_F16_F16, AMDGPUrsq>;
|
|
|
|
defm V_LOG_F16 : VOP1Inst <"v_log_f16", VOP_F16_F16, flog2>;
|
|
|
|
defm V_EXP_F16 : VOP1Inst <"v_exp_f16", VOP_F16_F16, fexp2>;
|
|
|
|
defm V_FREXP_MANT_F16 : VOP1Inst <"v_frexp_mant_f16", VOP_F16_F16, int_amdgcn_frexp_mant>;
|
2016-11-19 06:31:08 +08:00
|
|
|
defm V_FREXP_EXP_I16_F16 : VOP1Inst <"v_frexp_exp_i16_f16", VOP_I16_F16, int_amdgcn_frexp_exp>;
|
2016-11-13 15:01:11 +08:00
|
|
|
defm V_FLOOR_F16 : VOP1Inst <"v_floor_f16", VOP_F16_F16, ffloor>;
|
|
|
|
defm V_CEIL_F16 : VOP1Inst <"v_ceil_f16", VOP_F16_F16, fceil>;
|
|
|
|
defm V_TRUNC_F16 : VOP1Inst <"v_trunc_f16", VOP_F16_F16, ftrunc>;
|
|
|
|
defm V_RNDNE_F16 : VOP1Inst <"v_rndne_f16", VOP_F16_F16, frint>;
|
|
|
|
defm V_FRACT_F16 : VOP1Inst <"v_fract_f16", VOP_F16_F16, AMDGPUfract>;
|
|
|
|
defm V_SIN_F16 : VOP1Inst <"v_sin_f16", VOP_F16_F16, AMDGPUsin>;
|
|
|
|
defm V_COS_F16 : VOP1Inst <"v_cos_f16", VOP_F16_F16, AMDGPUcos>;
|
2016-09-23 17:08:07 +08:00
|
|
|
|
|
|
|
}
|
|
|
|
|
2017-10-03 08:06:41 +08:00
|
|
|
let OtherPredicates = [Has16BitInsts] in {
|
2016-11-11 00:02:37 +08:00
|
|
|
|
2017-10-03 08:06:41 +08:00
|
|
|
def : GCNPat<
|
2016-11-11 00:02:37 +08:00
|
|
|
(f32 (f16_to_fp i16:$src)),
|
|
|
|
(V_CVT_F32_F16_e32 $src)
|
|
|
|
>;
|
|
|
|
|
2017-10-03 08:06:41 +08:00
|
|
|
def : GCNPat<
|
2017-03-16 03:04:26 +08:00
|
|
|
(i16 (AMDGPUfp_to_f16 f32:$src)),
|
2016-11-11 00:02:37 +08:00
|
|
|
(V_CVT_F16_F32_e32 $src)
|
|
|
|
>;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2017-03-01 05:09:04 +08:00
|
|
|
def VOP_SWAP_I32 : VOPProfile<[i32, i32, i32, untyped]> {
|
|
|
|
let Outs32 = (outs VGPR_32:$vdst, VGPR_32:$vdst1);
|
|
|
|
let Ins32 = (ins VGPR_32:$src0, VGPR_32:$src1);
|
|
|
|
let Outs64 = Outs32;
|
|
|
|
let Asm32 = " $vdst, $src0";
|
|
|
|
let Asm64 = "";
|
|
|
|
let Ins64 = (ins);
|
|
|
|
}
|
|
|
|
|
|
|
|
let SubtargetPredicate = isGFX9 in {
|
|
|
|
let Constraints = "$vdst = $src1, $vdst1 = $src0",
|
|
|
|
DisableEncoding="$vdst1,$src1",
|
|
|
|
SchedRW = [Write64Bit, Write64Bit] in {
|
|
|
|
// Never VOP3. Takes as long as 2 v_mov_b32s
|
|
|
|
def V_SWAP_B32 : VOP1_Pseudo <"v_swap_b32", VOP_SWAP_I32, [], 1>;
|
|
|
|
}
|
|
|
|
|
|
|
|
} // End SubtargetPredicate = isGFX9
|
|
|
|
|
2016-09-23 17:08:07 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Target
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// SI
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
multiclass VOP1_Real_si <bits<9> op> {
|
|
|
|
let AssemblerPredicates = [isSICI], DecoderNamespace = "SICI" in {
|
|
|
|
def _e32_si :
|
|
|
|
VOP1_Real<!cast<VOP1_Pseudo>(NAME#"_e32"), SIEncodingFamily.SI>,
|
|
|
|
VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32").Pfl>;
|
|
|
|
def _e64_si :
|
|
|
|
VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>,
|
|
|
|
VOP3e_si <{1, 1, op{6-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
defm V_NOP : VOP1_Real_si <0x0>;
|
|
|
|
defm V_MOV_B32 : VOP1_Real_si <0x1>;
|
|
|
|
defm V_CVT_I32_F64 : VOP1_Real_si <0x3>;
|
|
|
|
defm V_CVT_F64_I32 : VOP1_Real_si <0x4>;
|
|
|
|
defm V_CVT_F32_I32 : VOP1_Real_si <0x5>;
|
|
|
|
defm V_CVT_F32_U32 : VOP1_Real_si <0x6>;
|
|
|
|
defm V_CVT_U32_F32 : VOP1_Real_si <0x7>;
|
|
|
|
defm V_CVT_I32_F32 : VOP1_Real_si <0x8>;
|
|
|
|
defm V_MOV_FED_B32 : VOP1_Real_si <0x9>;
|
|
|
|
defm V_CVT_F16_F32 : VOP1_Real_si <0xa>;
|
|
|
|
defm V_CVT_F32_F16 : VOP1_Real_si <0xb>;
|
|
|
|
defm V_CVT_RPI_I32_F32 : VOP1_Real_si <0xc>;
|
|
|
|
defm V_CVT_FLR_I32_F32 : VOP1_Real_si <0xd>;
|
|
|
|
defm V_CVT_OFF_F32_I4 : VOP1_Real_si <0xe>;
|
|
|
|
defm V_CVT_F32_F64 : VOP1_Real_si <0xf>;
|
|
|
|
defm V_CVT_F64_F32 : VOP1_Real_si <0x10>;
|
|
|
|
defm V_CVT_F32_UBYTE0 : VOP1_Real_si <0x11>;
|
|
|
|
defm V_CVT_F32_UBYTE1 : VOP1_Real_si <0x12>;
|
|
|
|
defm V_CVT_F32_UBYTE2 : VOP1_Real_si <0x13>;
|
|
|
|
defm V_CVT_F32_UBYTE3 : VOP1_Real_si <0x14>;
|
|
|
|
defm V_CVT_U32_F64 : VOP1_Real_si <0x15>;
|
|
|
|
defm V_CVT_F64_U32 : VOP1_Real_si <0x16>;
|
|
|
|
defm V_FRACT_F32 : VOP1_Real_si <0x20>;
|
|
|
|
defm V_TRUNC_F32 : VOP1_Real_si <0x21>;
|
|
|
|
defm V_CEIL_F32 : VOP1_Real_si <0x22>;
|
|
|
|
defm V_RNDNE_F32 : VOP1_Real_si <0x23>;
|
|
|
|
defm V_FLOOR_F32 : VOP1_Real_si <0x24>;
|
|
|
|
defm V_EXP_F32 : VOP1_Real_si <0x25>;
|
|
|
|
defm V_LOG_CLAMP_F32 : VOP1_Real_si <0x26>;
|
|
|
|
defm V_LOG_F32 : VOP1_Real_si <0x27>;
|
|
|
|
defm V_RCP_CLAMP_F32 : VOP1_Real_si <0x28>;
|
|
|
|
defm V_RCP_LEGACY_F32 : VOP1_Real_si <0x29>;
|
|
|
|
defm V_RCP_F32 : VOP1_Real_si <0x2a>;
|
|
|
|
defm V_RCP_IFLAG_F32 : VOP1_Real_si <0x2b>;
|
|
|
|
defm V_RSQ_CLAMP_F32 : VOP1_Real_si <0x2c>;
|
|
|
|
defm V_RSQ_LEGACY_F32 : VOP1_Real_si <0x2d>;
|
|
|
|
defm V_RSQ_F32 : VOP1_Real_si <0x2e>;
|
|
|
|
defm V_RCP_F64 : VOP1_Real_si <0x2f>;
|
|
|
|
defm V_RCP_CLAMP_F64 : VOP1_Real_si <0x30>;
|
|
|
|
defm V_RSQ_F64 : VOP1_Real_si <0x31>;
|
|
|
|
defm V_RSQ_CLAMP_F64 : VOP1_Real_si <0x32>;
|
|
|
|
defm V_SQRT_F32 : VOP1_Real_si <0x33>;
|
|
|
|
defm V_SQRT_F64 : VOP1_Real_si <0x34>;
|
|
|
|
defm V_SIN_F32 : VOP1_Real_si <0x35>;
|
|
|
|
defm V_COS_F32 : VOP1_Real_si <0x36>;
|
|
|
|
defm V_NOT_B32 : VOP1_Real_si <0x37>;
|
|
|
|
defm V_BFREV_B32 : VOP1_Real_si <0x38>;
|
|
|
|
defm V_FFBH_U32 : VOP1_Real_si <0x39>;
|
|
|
|
defm V_FFBL_B32 : VOP1_Real_si <0x3a>;
|
|
|
|
defm V_FFBH_I32 : VOP1_Real_si <0x3b>;
|
|
|
|
defm V_FREXP_EXP_I32_F64 : VOP1_Real_si <0x3c>;
|
|
|
|
defm V_FREXP_MANT_F64 : VOP1_Real_si <0x3d>;
|
|
|
|
defm V_FRACT_F64 : VOP1_Real_si <0x3e>;
|
|
|
|
defm V_FREXP_EXP_I32_F32 : VOP1_Real_si <0x3f>;
|
|
|
|
defm V_FREXP_MANT_F32 : VOP1_Real_si <0x40>;
|
|
|
|
defm V_CLREXCP : VOP1_Real_si <0x41>;
|
|
|
|
defm V_MOVRELD_B32 : VOP1_Real_si <0x42>;
|
|
|
|
defm V_MOVRELS_B32 : VOP1_Real_si <0x43>;
|
|
|
|
defm V_MOVRELSD_B32 : VOP1_Real_si <0x44>;
|
|
|
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// CI
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
multiclass VOP1_Real_ci <bits<9> op> {
|
|
|
|
let AssemblerPredicates = [isCIOnly], DecoderNamespace = "CI" in {
|
|
|
|
def _e32_ci :
|
|
|
|
VOP1_Real<!cast<VOP1_Pseudo>(NAME#"_e32"), SIEncodingFamily.SI>,
|
|
|
|
VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32").Pfl>;
|
|
|
|
def _e64_ci :
|
|
|
|
VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>,
|
|
|
|
VOP3e_si <{1, 1, op{6-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
defm V_TRUNC_F64 : VOP1_Real_ci <0x17>;
|
|
|
|
defm V_CEIL_F64 : VOP1_Real_ci <0x18>;
|
|
|
|
defm V_FLOOR_F64 : VOP1_Real_ci <0x1A>;
|
|
|
|
defm V_RNDNE_F64 : VOP1_Real_ci <0x19>;
|
|
|
|
defm V_LOG_LEGACY_F32 : VOP1_Real_ci <0x45>;
|
|
|
|
defm V_EXP_LEGACY_F32 : VOP1_Real_ci <0x46>;
|
|
|
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// VI
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
class VOP1_DPP <bits<8> op, VOP1_Pseudo ps, VOPProfile P = ps.Pfl> :
|
|
|
|
VOP_DPP <ps.OpName, P> {
|
|
|
|
let Defs = ps.Defs;
|
|
|
|
let Uses = ps.Uses;
|
|
|
|
let SchedRW = ps.SchedRW;
|
|
|
|
let hasSideEffects = ps.hasSideEffects;
|
|
|
|
|
|
|
|
bits<8> vdst;
|
|
|
|
let Inst{8-0} = 0xfa; // dpp
|
|
|
|
let Inst{16-9} = op;
|
|
|
|
let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0);
|
|
|
|
let Inst{31-25} = 0x3f; //encoding
|
|
|
|
}
|
|
|
|
|
2017-03-01 05:09:04 +08:00
|
|
|
multiclass VOP1Only_Real_vi <bits<10> op> {
|
|
|
|
let AssemblerPredicates = [isVI], DecoderNamespace = "VI" in {
|
|
|
|
def _vi :
|
|
|
|
VOP1_Real<!cast<VOP1_Pseudo>(NAME), SIEncodingFamily.VI>,
|
|
|
|
VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME).Pfl>;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-09-23 17:08:07 +08:00
|
|
|
multiclass VOP1_Real_vi <bits<10> op> {
|
|
|
|
let AssemblerPredicates = [isVI], DecoderNamespace = "VI" in {
|
|
|
|
def _e32_vi :
|
|
|
|
VOP1_Real<!cast<VOP1_Pseudo>(NAME#"_e32"), SIEncodingFamily.VI>,
|
|
|
|
VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32").Pfl>;
|
|
|
|
def _e64_vi :
|
|
|
|
VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>,
|
|
|
|
VOP3e_vi <!add(0x140, op), !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
|
|
|
|
}
|
|
|
|
|
2016-12-22 20:57:41 +08:00
|
|
|
def _sdwa_vi :
|
|
|
|
VOP_SDWA_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>,
|
|
|
|
VOP1_SDWAe <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>;
|
|
|
|
|
2017-05-23 18:08:55 +08:00
|
|
|
def _sdwa_gfx9 :
|
2017-06-21 16:53:38 +08:00
|
|
|
VOP_SDWA9_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>,
|
|
|
|
VOP1_SDWA9Ae <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>;
|
2017-05-23 18:08:55 +08:00
|
|
|
|
2016-12-22 20:57:41 +08:00
|
|
|
// For now left dpp only for asm/dasm
|
2016-09-23 17:08:07 +08:00
|
|
|
// TODO: add corresponding pseudo
|
|
|
|
def _dpp : VOP1_DPP<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32")>;
|
|
|
|
}
|
|
|
|
|
|
|
|
defm V_NOP : VOP1_Real_vi <0x0>;
|
|
|
|
defm V_MOV_B32 : VOP1_Real_vi <0x1>;
|
|
|
|
defm V_CVT_I32_F64 : VOP1_Real_vi <0x3>;
|
|
|
|
defm V_CVT_F64_I32 : VOP1_Real_vi <0x4>;
|
|
|
|
defm V_CVT_F32_I32 : VOP1_Real_vi <0x5>;
|
|
|
|
defm V_CVT_F32_U32 : VOP1_Real_vi <0x6>;
|
|
|
|
defm V_CVT_U32_F32 : VOP1_Real_vi <0x7>;
|
|
|
|
defm V_CVT_I32_F32 : VOP1_Real_vi <0x8>;
|
2017-04-13 01:10:07 +08:00
|
|
|
defm V_MOV_FED_B32 : VOP1_Real_vi <0x9>;
|
2016-09-23 17:08:07 +08:00
|
|
|
defm V_CVT_F16_F32 : VOP1_Real_vi <0xa>;
|
|
|
|
defm V_CVT_F32_F16 : VOP1_Real_vi <0xb>;
|
|
|
|
defm V_CVT_RPI_I32_F32 : VOP1_Real_vi <0xc>;
|
|
|
|
defm V_CVT_FLR_I32_F32 : VOP1_Real_vi <0xd>;
|
|
|
|
defm V_CVT_OFF_F32_I4 : VOP1_Real_vi <0xe>;
|
|
|
|
defm V_CVT_F32_F64 : VOP1_Real_vi <0xf>;
|
|
|
|
defm V_CVT_F64_F32 : VOP1_Real_vi <0x10>;
|
|
|
|
defm V_CVT_F32_UBYTE0 : VOP1_Real_vi <0x11>;
|
|
|
|
defm V_CVT_F32_UBYTE1 : VOP1_Real_vi <0x12>;
|
|
|
|
defm V_CVT_F32_UBYTE2 : VOP1_Real_vi <0x13>;
|
|
|
|
defm V_CVT_F32_UBYTE3 : VOP1_Real_vi <0x14>;
|
|
|
|
defm V_CVT_U32_F64 : VOP1_Real_vi <0x15>;
|
|
|
|
defm V_CVT_F64_U32 : VOP1_Real_vi <0x16>;
|
|
|
|
defm V_FRACT_F32 : VOP1_Real_vi <0x1b>;
|
|
|
|
defm V_TRUNC_F32 : VOP1_Real_vi <0x1c>;
|
|
|
|
defm V_CEIL_F32 : VOP1_Real_vi <0x1d>;
|
|
|
|
defm V_RNDNE_F32 : VOP1_Real_vi <0x1e>;
|
|
|
|
defm V_FLOOR_F32 : VOP1_Real_vi <0x1f>;
|
|
|
|
defm V_EXP_F32 : VOP1_Real_vi <0x20>;
|
|
|
|
defm V_LOG_F32 : VOP1_Real_vi <0x21>;
|
|
|
|
defm V_RCP_F32 : VOP1_Real_vi <0x22>;
|
|
|
|
defm V_RCP_IFLAG_F32 : VOP1_Real_vi <0x23>;
|
|
|
|
defm V_RSQ_F32 : VOP1_Real_vi <0x24>;
|
|
|
|
defm V_RCP_F64 : VOP1_Real_vi <0x25>;
|
|
|
|
defm V_RSQ_F64 : VOP1_Real_vi <0x26>;
|
|
|
|
defm V_SQRT_F32 : VOP1_Real_vi <0x27>;
|
|
|
|
defm V_SQRT_F64 : VOP1_Real_vi <0x28>;
|
|
|
|
defm V_SIN_F32 : VOP1_Real_vi <0x29>;
|
|
|
|
defm V_COS_F32 : VOP1_Real_vi <0x2a>;
|
|
|
|
defm V_NOT_B32 : VOP1_Real_vi <0x2b>;
|
|
|
|
defm V_BFREV_B32 : VOP1_Real_vi <0x2c>;
|
|
|
|
defm V_FFBH_U32 : VOP1_Real_vi <0x2d>;
|
|
|
|
defm V_FFBL_B32 : VOP1_Real_vi <0x2e>;
|
|
|
|
defm V_FFBH_I32 : VOP1_Real_vi <0x2f>;
|
|
|
|
defm V_FREXP_EXP_I32_F64 : VOP1_Real_vi <0x30>;
|
|
|
|
defm V_FREXP_MANT_F64 : VOP1_Real_vi <0x31>;
|
|
|
|
defm V_FRACT_F64 : VOP1_Real_vi <0x32>;
|
|
|
|
defm V_FREXP_EXP_I32_F32 : VOP1_Real_vi <0x33>;
|
|
|
|
defm V_FREXP_MANT_F32 : VOP1_Real_vi <0x34>;
|
|
|
|
defm V_CLREXCP : VOP1_Real_vi <0x35>;
|
|
|
|
defm V_MOVRELD_B32 : VOP1_Real_vi <0x36>;
|
|
|
|
defm V_MOVRELS_B32 : VOP1_Real_vi <0x37>;
|
|
|
|
defm V_MOVRELSD_B32 : VOP1_Real_vi <0x38>;
|
|
|
|
defm V_TRUNC_F64 : VOP1_Real_vi <0x17>;
|
|
|
|
defm V_CEIL_F64 : VOP1_Real_vi <0x18>;
|
|
|
|
defm V_FLOOR_F64 : VOP1_Real_vi <0x1A>;
|
|
|
|
defm V_RNDNE_F64 : VOP1_Real_vi <0x19>;
|
|
|
|
defm V_LOG_LEGACY_F32 : VOP1_Real_vi <0x4c>;
|
|
|
|
defm V_EXP_LEGACY_F32 : VOP1_Real_vi <0x4b>;
|
|
|
|
defm V_CVT_F16_U16 : VOP1_Real_vi <0x39>;
|
|
|
|
defm V_CVT_F16_I16 : VOP1_Real_vi <0x3a>;
|
|
|
|
defm V_CVT_U16_F16 : VOP1_Real_vi <0x3b>;
|
|
|
|
defm V_CVT_I16_F16 : VOP1_Real_vi <0x3c>;
|
|
|
|
defm V_RCP_F16 : VOP1_Real_vi <0x3d>;
|
|
|
|
defm V_SQRT_F16 : VOP1_Real_vi <0x3e>;
|
|
|
|
defm V_RSQ_F16 : VOP1_Real_vi <0x3f>;
|
|
|
|
defm V_LOG_F16 : VOP1_Real_vi <0x40>;
|
|
|
|
defm V_EXP_F16 : VOP1_Real_vi <0x41>;
|
|
|
|
defm V_FREXP_MANT_F16 : VOP1_Real_vi <0x42>;
|
|
|
|
defm V_FREXP_EXP_I16_F16 : VOP1_Real_vi <0x43>;
|
|
|
|
defm V_FLOOR_F16 : VOP1_Real_vi <0x44>;
|
|
|
|
defm V_CEIL_F16 : VOP1_Real_vi <0x45>;
|
|
|
|
defm V_TRUNC_F16 : VOP1_Real_vi <0x46>;
|
|
|
|
defm V_RNDNE_F16 : VOP1_Real_vi <0x47>;
|
|
|
|
defm V_FRACT_F16 : VOP1_Real_vi <0x48>;
|
|
|
|
defm V_SIN_F16 : VOP1_Real_vi <0x49>;
|
|
|
|
defm V_COS_F16 : VOP1_Real_vi <0x4a>;
|
2017-03-01 05:09:04 +08:00
|
|
|
defm V_SWAP_B32 : VOP1Only_Real_vi <0x51>;
|
2016-10-13 02:49:05 +08:00
|
|
|
|
|
|
|
// Copy of v_mov_b32 with $vdst as a use operand for use with VGPR
|
|
|
|
// indexing mode. vdst can't be treated as a def for codegen purposes,
|
|
|
|
// and an implicit use and def of the super register should be added.
|
|
|
|
def V_MOV_B32_indirect : VPseudoInstSI<(outs),
|
|
|
|
(ins getVALUDstForVT<i32>.ret:$vdst, getVOPSrc0ForVT<i32>.ret:$src0)>,
|
|
|
|
PseudoInstExpansion<(V_MOV_B32_e32_vi getVALUDstForVT<i32>.ret:$vdst,
|
|
|
|
getVOPSrc0ForVT<i32>.ret:$src0)> {
|
|
|
|
let VOP1 = 1;
|
Check that emitted instructions meet their predicates on all targets except ARM, Mips, and X86.
Summary:
* ARM is omitted from this patch because this check appears to expose bugs in this target.
* Mips is omitted from this patch because this check either detects bugs or deliberate
emission of instructions that don't satisfy their predicates. One deliberate
use is the SYNC instruction where the version with an operand is correctly
defined as requiring MIPS32 while the version without an operand is defined
as an alias of 'SYNC 0' and requires MIPS2.
* X86 is omitted from this patch because it doesn't use the tablegen-erated
MCCodeEmitter infrastructure.
Patches for ARM and Mips will follow.
Depends on D25617
Reviewers: tstellarAMD, jmolloy
Subscribers: wdng, jmolloy, aemerson, rengolin, arsenm, jyknight, nemanjai, nhaehnle, tstellarAMD, llvm-commits
Differential Revision: https://reviews.llvm.org/D25618
llvm-svn: 287439
2016-11-19 21:05:44 +08:00
|
|
|
let SubtargetPredicate = isVI;
|
2016-10-13 02:49:05 +08:00
|
|
|
}
|
|
|
|
|
AMDGPU: Fix Two Address problems with v_movreld
Summary:
The v_movreld machine instruction is used with three operands that are
in a sense tied to each other (the explicit VGPR_32 def and the implicit
VGPR_NN def and use). There is no way to express that using the currently
available operand bits, and indeed there are cases where the Two Address
instructions pass does the wrong thing.
This patch introduces a new set of pseudo instructions that are identical
in intended semantics as v_movreld, but they only have two tied operands.
Having to add a new set of pseudo instructions is admittedly annoying, but
it's a fairly straightforward and solid approach. The only alternative I
see is to try to teach the Two Address instructions pass about Three Address
instructions, and I'm afraid that's trickier and is going to end up more
fragile.
Note that v_movrels does not suffer from this problem, and so this patch
does not touch it.
This fixes several GL45-CTS.shaders.indexing.* tests.
Reviewers: tstellarAMD, arsenm
Subscribers: kzhuravl, wdng, yaxunl, llvm-commits, tony-tye
Differential Revision: https://reviews.llvm.org/D25633
llvm-svn: 284980
2016-10-24 22:56:02 +08:00
|
|
|
// This is a pseudo variant of the v_movreld_b32 instruction in which the
|
|
|
|
// vector operand appears only twice, once as def and once as use. Using this
|
|
|
|
// pseudo avoids problems with the Two Address instructions pass.
|
|
|
|
class V_MOVRELD_B32_pseudo<RegisterClass rc> : VPseudoInstSI <
|
|
|
|
(outs rc:$vdst),
|
|
|
|
(ins rc:$vsrc, VSrc_b32:$val, i32imm:$offset)> {
|
|
|
|
let VOP1 = 1;
|
|
|
|
|
|
|
|
let Constraints = "$vsrc = $vdst";
|
|
|
|
let Uses = [M0, EXEC];
|
|
|
|
|
|
|
|
let SubtargetPredicate = HasMovrel;
|
|
|
|
}
|
|
|
|
|
|
|
|
def V_MOVRELD_B32_V1 : V_MOVRELD_B32_pseudo<VGPR_32>;
|
|
|
|
def V_MOVRELD_B32_V2 : V_MOVRELD_B32_pseudo<VReg_64>;
|
|
|
|
def V_MOVRELD_B32_V4 : V_MOVRELD_B32_pseudo<VReg_128>;
|
|
|
|
def V_MOVRELD_B32_V8 : V_MOVRELD_B32_pseudo<VReg_256>;
|
|
|
|
def V_MOVRELD_B32_V16 : V_MOVRELD_B32_pseudo<VReg_512>;
|
|
|
|
|
2017-10-03 08:06:41 +08:00
|
|
|
let OtherPredicates = [isVI] in {
|
2016-09-23 17:08:07 +08:00
|
|
|
|
2017-10-03 08:06:41 +08:00
|
|
|
def : GCNPat <
|
2016-11-11 00:02:37 +08:00
|
|
|
(i32 (int_amdgcn_mov_dpp i32:$src, imm:$dpp_ctrl, imm:$row_mask, imm:$bank_mask,
|
|
|
|
imm:$bound_ctrl)),
|
[AMDGPU] Add pseudo "old" source to all DPP instructions
Summary:
All instructions with the DPP modifier may not write to certain lanes of
the output if bound_ctrl=1 is set or any bits in bank_mask or row_mask
aren't set, so the destination register may be both defined and modified.
The right way to handle this is to add a constraint that the destination
register is the same as one of the inputs. We could tie the destination
to the first source, but that would be too restrictive for some use-cases
where we want the destination to be some other value before the
instruction executes. Instead, add a fake "old" source and tie it to the
destination. Effectively, the "old" source defines what value unwritten
lanes will get. We'll expose this functionality to users with a new
intrinsic later.
Also, we want to use DPP instructions for computing derivatives, which
means we need to set WQM for them. We also need to enable the entire
wavefront when using DPP intrinsics to implement nonuniform subgroup
reductions, since otherwise we'll get incorrect results in some cases.
To accomodate this, add a new operand to all DPP instructions which will
be interpreted by the SI WQM pass. This will be exposed with a new
intrinsic later. We'll also add support for Whole Wavefront Mode later.
I also fixed llvm.amdgcn.mov.dpp to overwrite the source and fixed up
the test. However, I could also keep the old behavior (where lanes that
aren't written are undefined) if people want it.
Reviewers: tstellar, arsenm
Subscribers: kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye
Differential Revision: https://reviews.llvm.org/D34716
llvm-svn: 310283
2017-08-08 03:10:56 +08:00
|
|
|
(V_MOV_B32_dpp $src, $src, (as_i32imm $dpp_ctrl),
|
|
|
|
(as_i32imm $row_mask), (as_i32imm $bank_mask),
|
|
|
|
(as_i1imm $bound_ctrl))
|
2016-09-23 17:08:07 +08:00
|
|
|
>;
|
|
|
|
|
2017-10-03 08:06:41 +08:00
|
|
|
def : GCNPat <
|
[AMDGPU] Add llvm.amdgpu.update.dpp intrinsic
Summary:
Now that we've made all the necessary backend changes, we can add a new
intrinsic which exposes the new capabilities to IR producers. Since
llvm.amdgpu.update.dpp is a strict superset of llvm.amdgpu.mov.dpp, we
should deprecate the former. We also add tests for all the functionality
that was added in previous changes, now that we can access it via an IR
construct.
Reviewers: tstellar, arsenm
Subscribers: kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye
Differential Revision: https://reviews.llvm.org/D34718
llvm-svn: 310399
2017-08-09 02:52:22 +08:00
|
|
|
(i32 (int_amdgcn_update_dpp i32:$old, i32:$src, imm:$dpp_ctrl, imm:$row_mask,
|
|
|
|
imm:$bank_mask, imm:$bound_ctrl)),
|
|
|
|
(V_MOV_B32_dpp $old, $src, (as_i32imm $dpp_ctrl),
|
|
|
|
(as_i32imm $row_mask), (as_i32imm $bank_mask),
|
|
|
|
(as_i1imm $bound_ctrl))
|
|
|
|
>;
|
|
|
|
|
2017-10-03 08:06:41 +08:00
|
|
|
def : GCNPat<
|
2016-11-11 00:02:37 +08:00
|
|
|
(i32 (anyext i16:$src)),
|
|
|
|
(COPY $src)
|
|
|
|
>;
|
|
|
|
|
2017-10-03 08:06:41 +08:00
|
|
|
def : GCNPat<
|
2016-11-11 00:02:37 +08:00
|
|
|
(i64 (anyext i16:$src)),
|
|
|
|
(REG_SEQUENCE VReg_64,
|
|
|
|
(i32 (COPY $src)), sub0,
|
|
|
|
(V_MOV_B32_e32 (i32 0)), sub1)
|
|
|
|
>;
|
|
|
|
|
2017-10-03 08:06:41 +08:00
|
|
|
def : GCNPat<
|
2016-11-11 00:02:37 +08:00
|
|
|
(i16 (trunc i32:$src)),
|
|
|
|
(COPY $src)
|
|
|
|
>;
|
|
|
|
|
2017-10-03 08:06:41 +08:00
|
|
|
def : GCNPat <
|
2016-11-11 00:02:37 +08:00
|
|
|
(i16 (trunc i64:$src)),
|
|
|
|
(EXTRACT_SUBREG $src, sub0)
|
|
|
|
>;
|
|
|
|
|
2017-10-03 08:06:41 +08:00
|
|
|
} // End OtherPredicates = [isVI]
|