forked from OSchip/llvm-project
R600/SI: Do abs/neg folding with ComplexPatterns
Abs/neg folding has moved out of foldOperands and into the instruction selection phase using complex patterns. As a consequence of this change, we now prefer to select the 64-bit encoding for most instructions and the modifier operands have been dropped from integer VOP3 instructions. llvm-svn: 214467
This commit is contained in:
parent
6655dd699f
commit
b4a313a76f
|
@ -96,6 +96,9 @@ private:
|
|||
SDValue &SOffset, SDValue &Offset, SDValue &Offen,
|
||||
SDValue &Idxen, SDValue &GLC, SDValue &SLC,
|
||||
SDValue &TFE) const;
|
||||
bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
|
||||
bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
|
||||
SDValue &Clamp, SDValue &Omod) const;
|
||||
|
||||
SDNode *SelectADD_SUB_I64(SDNode *N);
|
||||
SDNode *SelectDIV_SCALE(SDNode *N);
|
||||
|
@ -879,6 +882,38 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr32(SDValue Addr, SDValue &SRsrc,
|
|||
return SelectMUBUFScratch(Addr, SRsrc, VAddr, SOffset, Offset);
|
||||
}
|
||||
|
||||
bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
|
||||
SDValue &SrcMods) const {
|
||||
|
||||
unsigned Mods = 0;
|
||||
|
||||
Src = In;
|
||||
|
||||
if (Src.getOpcode() == ISD::FNEG) {
|
||||
Mods |= SISrcMods::NEG;
|
||||
Src = Src.getOperand(0);
|
||||
}
|
||||
|
||||
if (Src.getOpcode() == ISD::FABS) {
|
||||
Mods |= SISrcMods::ABS;
|
||||
Src = Src.getOperand(0);
|
||||
}
|
||||
|
||||
SrcMods = CurDAG->getTargetConstant(Mods, MVT::i32);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
|
||||
SDValue &SrcMods, SDValue &Clamp,
|
||||
SDValue &Omod) const {
|
||||
// FIXME: Handle Clamp and Omod
|
||||
Clamp = CurDAG->getTargetConstant(0, MVT::i32);
|
||||
Omod = CurDAG->getTargetConstant(0, MVT::i32);
|
||||
|
||||
return SelectVOP3Mods(In, Src, SrcMods);
|
||||
}
|
||||
|
||||
void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
|
||||
const AMDGPUTargetLowering& Lowering =
|
||||
*static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
|
||||
|
|
|
@ -323,6 +323,14 @@ def atomic_cmp_swap_64_local :
|
|||
AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
|
||||
}]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Misc Pattern Fragments
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def fmad : PatFrag <
|
||||
(ops node:$src0, node:$src1, node:$src2),
|
||||
(fadd (fmul node:$src0, node:$src1), node:$src2)
|
||||
>;
|
||||
|
||||
class Constants {
|
||||
int TWO_PI = 0x40c90fdb;
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AMDGPU.h"
|
||||
#include "SIDefines.h"
|
||||
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
|
||||
#include "MCTargetDesc/AMDGPUMCCodeEmitter.h"
|
||||
#include "MCTargetDesc/AMDGPUFixupKinds.h"
|
||||
|
@ -84,6 +85,15 @@ MCCodeEmitter *llvm::createSIMCCodeEmitter(const MCInstrInfo &MCII,
|
|||
|
||||
bool SIMCCodeEmitter::isSrcOperand(const MCInstrDesc &Desc,
|
||||
unsigned OpNo) const {
|
||||
// FIXME: We need a better way to figure out which operands can be immediate
|
||||
// values
|
||||
//
|
||||
// Some VOP* instructions like ADDC use VReg32 as the register class
|
||||
// for source 0, because they read VCC and can't take an SGPR as an
|
||||
// argument due to constant bus restrictions.
|
||||
if (OpNo == 1 && (Desc.TSFlags & (SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
|
||||
SIInstrFlags::VOPC)))
|
||||
return true;
|
||||
|
||||
unsigned RegClass = Desc.OpInfo[OpNo].RegClass;
|
||||
return (AMDGPU::SSrc_32RegClassID == RegClass) ||
|
||||
|
|
|
@ -566,8 +566,6 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
|
|||
.addReg(MI->getOperand(1).getReg())
|
||||
.addImm(1) // SRC1 modifiers
|
||||
.addReg(MI->getOperand(2).getReg())
|
||||
.addImm(0) // SRC2 modifiers
|
||||
.addImm(0) // src2
|
||||
.addImm(0) // CLAMP
|
||||
.addImm(0); // OMOD
|
||||
MI->eraseFromParent();
|
||||
|
@ -1636,40 +1634,24 @@ SDNode *SITargetLowering::foldOperands(MachineSDNode *Node,
|
|||
continue;
|
||||
if (!Operand.isMachineOpcode())
|
||||
continue;
|
||||
if (Operand.getMachineOpcode() == AMDGPU::FNEG_SI) {
|
||||
Ops.pop_back();
|
||||
Ops.push_back(Operand.getOperand(0));
|
||||
InputModifiers[i] = 1;
|
||||
Promote2e64 = true;
|
||||
if (!DescE64)
|
||||
continue;
|
||||
Desc = DescE64;
|
||||
DescE64 = nullptr;
|
||||
}
|
||||
else if (Operand.getMachineOpcode() == AMDGPU::FABS_SI) {
|
||||
Ops.pop_back();
|
||||
Ops.push_back(Operand.getOperand(0));
|
||||
InputModifiers[i] = 2;
|
||||
Promote2e64 = true;
|
||||
if (!DescE64)
|
||||
continue;
|
||||
Desc = DescE64;
|
||||
DescE64 = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
if (Promote2e64) {
|
||||
std::vector<SDValue> OldOps(Ops);
|
||||
Ops.clear();
|
||||
bool HasModifiers = TII->hasModifiers(Desc->Opcode);
|
||||
for (unsigned i = 0; i < OldOps.size(); ++i) {
|
||||
// src_modifier
|
||||
if (HasModifiers)
|
||||
Ops.push_back(DAG.getTargetConstant(InputModifiers[i], MVT::i32));
|
||||
Ops.push_back(OldOps[i]);
|
||||
}
|
||||
// Add the modifier flags while promoting
|
||||
if (HasModifiers) {
|
||||
for (unsigned i = 0; i < 2; ++i)
|
||||
Ops.push_back(DAG.getTargetConstant(0, MVT::i32));
|
||||
}
|
||||
}
|
||||
|
||||
// Add optional chain and glue
|
||||
for (unsigned i = NumOps - NumDefs, e = Node->getNumOperands(); i < e; ++i)
|
||||
|
|
|
@ -61,9 +61,16 @@ class VOP3Common <dag outs, dag ins, string asm, list<dag> pattern> :
|
|||
let mayStore = 0;
|
||||
let hasSideEffects = 0;
|
||||
let UseNamedOperandTable = 1;
|
||||
// Using complex patterns gives VOP3 patterns a very high complexity rating,
|
||||
// but standalone patterns are almost always prefered, so we need to adjust the
|
||||
// priority lower. The goal is to use a high number to reduce complexity to
|
||||
// zero (or less than zero).
|
||||
let AddedComplexity = -1000;
|
||||
|
||||
let VOP3 = 1;
|
||||
|
||||
int Size = 8;
|
||||
let Uses = [EXEC];
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
|
|
@ -488,12 +488,19 @@ MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
// XXX: Commute VOP3 instructions with abs and neg set.
|
||||
if (isVOP3(MI->getOpcode()) &&
|
||||
(MI->getOperand(AMDGPU::getNamedOperandIdx(MI->getOpcode(),
|
||||
AMDGPU::OpName::abs)).getImm() ||
|
||||
MI->getOperand(AMDGPU::getNamedOperandIdx(MI->getOpcode(),
|
||||
AMDGPU::OpName::neg)).getImm()))
|
||||
// XXX: Commute VOP3 instructions with abs and neg set .
|
||||
const MachineOperand *Abs = getNamedOperand(*MI, AMDGPU::OpName::abs);
|
||||
const MachineOperand *Neg = getNamedOperand(*MI, AMDGPU::OpName::neg);
|
||||
const MachineOperand *Src0Mods = getNamedOperand(*MI,
|
||||
AMDGPU::OpName::src0_modifiers);
|
||||
const MachineOperand *Src1Mods = getNamedOperand(*MI,
|
||||
AMDGPU::OpName::src1_modifiers);
|
||||
const MachineOperand *Src2Mods = getNamedOperand(*MI,
|
||||
AMDGPU::OpName::src2_modifiers);
|
||||
|
||||
if ((Abs && Abs->getImm()) || (Neg && Neg->getImm()) ||
|
||||
(Src0Mods && Src0Mods->getImm()) || (Src1Mods && Src1Mods->getImm()) ||
|
||||
(Src2Mods && Src2Mods->getImm()))
|
||||
return nullptr;
|
||||
|
||||
unsigned Reg = MI->getOperand(1).getReg();
|
||||
|
@ -672,6 +679,14 @@ bool SIInstrInfo::hasVALU32BitEncoding(unsigned Opcode) const {
|
|||
return AMDGPU::getVOPe32(Opcode) != -1;
|
||||
}
|
||||
|
||||
bool SIInstrInfo::hasModifiers(unsigned Opcode) const {
|
||||
// The src0_modifier operand is present on all instructions
|
||||
// that have modifiers.
|
||||
|
||||
return AMDGPU::getNamedOperandIdx(Opcode,
|
||||
AMDGPU::OpName::src0_modifiers) != -1;
|
||||
}
|
||||
|
||||
bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
|
||||
StringRef &ErrInfo) const {
|
||||
uint16_t Opcode = MI->getOpcode();
|
||||
|
@ -688,16 +703,22 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
|
|||
}
|
||||
|
||||
// Make sure the register classes are correct
|
||||
for (unsigned i = 0, e = Desc.getNumOperands(); i != e; ++i) {
|
||||
for (int i = 0, e = Desc.getNumOperands(); i != e; ++i) {
|
||||
switch (Desc.OpInfo[i].OperandType) {
|
||||
case MCOI::OPERAND_REGISTER: {
|
||||
int RegClass = Desc.OpInfo[i].RegClass;
|
||||
if (!RI.regClassCanUseImmediate(RegClass) &&
|
||||
(MI->getOperand(i).isImm() || MI->getOperand(i).isFPImm())) {
|
||||
// Handle some special cases:
|
||||
// Src0 can of VOP1, VOP2, VOPC can be an immediate no matter what
|
||||
// the register class.
|
||||
if (i != Src0Idx || (!isVOP1(Opcode) && !isVOP2(Opcode) &&
|
||||
!isVOPC(Opcode))) {
|
||||
ErrInfo = "Expected register, but got immediate";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
case MCOI::OPERAND_IMMEDIATE:
|
||||
// Check if this operand is an immediate.
|
||||
|
@ -1423,17 +1444,9 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
|
|||
// We are converting these to a BFE, so we need to add the missing
|
||||
// operands for the size and offset.
|
||||
unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
|
||||
Inst->addOperand(Inst->getOperand(1));
|
||||
Inst->getOperand(1).ChangeToImmediate(0);
|
||||
Inst->addOperand(MachineOperand::CreateImm(0));
|
||||
Inst->addOperand(MachineOperand::CreateImm(0));
|
||||
Inst->addOperand(MachineOperand::CreateImm(0));
|
||||
Inst->addOperand(MachineOperand::CreateImm(Size));
|
||||
|
||||
// XXX - Other pointless operands. There are 4, but it seems you only need
|
||||
// 3 to not hit an assertion later in MCInstLower.
|
||||
Inst->addOperand(MachineOperand::CreateImm(0));
|
||||
Inst->addOperand(MachineOperand::CreateImm(0));
|
||||
} else if (Opcode == AMDGPU::S_BCNT1_I32_B32) {
|
||||
// The VALU version adds the second operand to the result, so insert an
|
||||
// extra 0 operand.
|
||||
|
@ -1452,16 +1465,9 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
|
|||
|
||||
uint32_t Offset = Imm & 0x3f; // Extract bits [5:0].
|
||||
uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16].
|
||||
|
||||
Inst->RemoveOperand(2); // Remove old immediate.
|
||||
Inst->addOperand(Inst->getOperand(1));
|
||||
Inst->getOperand(1).ChangeToImmediate(0);
|
||||
Inst->addOperand(MachineOperand::CreateImm(0));
|
||||
Inst->addOperand(MachineOperand::CreateImm(Offset));
|
||||
Inst->addOperand(MachineOperand::CreateImm(0));
|
||||
Inst->addOperand(MachineOperand::CreateImm(BitWidth));
|
||||
Inst->addOperand(MachineOperand::CreateImm(0));
|
||||
Inst->addOperand(MachineOperand::CreateImm(0));
|
||||
}
|
||||
|
||||
// Update the destination register class.
|
||||
|
|
|
@ -119,6 +119,9 @@ public:
|
|||
/// This function will return false if you pass it a 32-bit instruction.
|
||||
bool hasVALU32BitEncoding(unsigned Opcode) const;
|
||||
|
||||
/// \brief Return true if this instruction has any modifiers.
|
||||
/// e.g. src[012]_mod, omod, clamp.
|
||||
bool hasModifiers(unsigned Opcode) const;
|
||||
bool verifyInstruction(const MachineInstr *MI,
|
||||
StringRef &ErrInfo) const override;
|
||||
|
||||
|
@ -219,4 +222,11 @@ namespace SIInstrFlags {
|
|||
};
|
||||
}
|
||||
|
||||
namespace SISrcMods {
|
||||
enum {
|
||||
NEG = 1 << 0,
|
||||
ABS = 1 << 1
|
||||
};
|
||||
}
|
||||
|
||||
#endif //SIINSTRINFO_H
|
||||
|
|
|
@ -159,6 +159,8 @@ def sopp_brtarget : Operand<OtherVT> {
|
|||
let OperandType = "OPERAND_PCREL";
|
||||
}
|
||||
|
||||
include "SIInstrFormats.td"
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Complex patterns
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -167,6 +169,9 @@ def MUBUFAddr32 : ComplexPattern<i64, 9, "SelectMUBUFAddr32">;
|
|||
def MUBUFAddr64 : ComplexPattern<i64, 3, "SelectMUBUFAddr64">;
|
||||
def MUBUFScratch : ComplexPattern<i64, 4, "SelectMUBUFScratch">;
|
||||
|
||||
def VOP3Mods0 : ComplexPattern<untyped, 4, "SelectVOP3Mods0">;
|
||||
def VOP3Mods : ComplexPattern<untyped, 2, "SelectVOP3Mods">;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SI assembler operands
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -176,7 +181,17 @@ def SIOperand {
|
|||
int VCC = 0x6A;
|
||||
}
|
||||
|
||||
include "SIInstrFormats.td"
|
||||
def SRCMODS {
|
||||
int NONE = 0;
|
||||
}
|
||||
|
||||
def DSTCLAMP {
|
||||
int NONE = 0;
|
||||
}
|
||||
|
||||
def DSTOMOD {
|
||||
int NONE = 0;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
|
@ -270,6 +285,195 @@ multiclass SMRD_Helper <bits<5> op, string asm, RegisterClass baseClass,
|
|||
// Vector ALU classes
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// This must always be right before the operand being input modified.
|
||||
def InputMods : OperandWithDefaultOps <i32, (ops (i32 0))> {
|
||||
let PrintMethod = "printOperandAndMods";
|
||||
}
|
||||
def InputModsNoDefault : Operand <i32> {
|
||||
let PrintMethod = "printOperandAndMods";
|
||||
}
|
||||
|
||||
class getNumSrcArgs<ValueType Src1, ValueType Src2> {
|
||||
int ret =
|
||||
!if (!eq(Src1.Value, untyped.Value), 1, // VOP1
|
||||
!if (!eq(Src2.Value, untyped.Value), 2, // VOP2
|
||||
3)); // VOP3
|
||||
}
|
||||
|
||||
// Returns the register class to use for the destination of VOP[123C]
|
||||
// instructions for the given VT.
|
||||
class getVALUDstForVT<ValueType VT> {
|
||||
RegisterClass ret = !if(!eq(VT.Size, 32), VReg_32, VReg_64);
|
||||
}
|
||||
|
||||
// Returns the register class to use for source 0 of VOP[12C]
|
||||
// instructions for the given VT.
|
||||
class getVOPSrc0ForVT<ValueType VT> {
|
||||
RegisterClass ret = !if(!eq(VT.Size, 32), VSrc_32, VSrc_64);
|
||||
}
|
||||
|
||||
// Returns the register class to use for source 1 of VOP[12C] for the
|
||||
// given VT.
|
||||
class getVOPSrc1ForVT<ValueType VT> {
|
||||
RegisterClass ret = !if(!eq(VT.Size, 32), VReg_32, VReg_64);
|
||||
}
|
||||
|
||||
// Returns the register classes for the source arguments of a VOP[12C]
|
||||
// instruction for the given SrcVTs.
|
||||
class getInRC32 <list<ValueType> SrcVT> {
|
||||
list<RegisterClass> ret = [
|
||||
getVOPSrc0ForVT<SrcVT[0]>.ret,
|
||||
getVOPSrc1ForVT<SrcVT[1]>.ret
|
||||
];
|
||||
}
|
||||
|
||||
// Returns the register class to use for sources of VOP3 instructions for the
|
||||
// given VT.
|
||||
class getVOP3SrcForVT<ValueType VT> {
|
||||
RegisterClass ret = !if(!eq(VT.Size, 32), VSrc_32, VSrc_64);
|
||||
}
|
||||
|
||||
// Returns the register classes for the source arguments of a VOP3
|
||||
// instruction for the given SrcVTs.
|
||||
class getInRC64 <list<ValueType> SrcVT> {
|
||||
list<RegisterClass> ret = [
|
||||
getVOP3SrcForVT<SrcVT[0]>.ret,
|
||||
getVOP3SrcForVT<SrcVT[1]>.ret,
|
||||
getVOP3SrcForVT<SrcVT[2]>.ret
|
||||
];
|
||||
}
|
||||
|
||||
// Returns 1 if the source arguments have modifiers, 0 if they do not.
|
||||
class hasModifiers<ValueType SrcVT> {
|
||||
bit ret = !if(!eq(SrcVT.Value, f32.Value), 1,
|
||||
!if(!eq(SrcVT.Value, f64.Value), 1, 0));
|
||||
}
|
||||
|
||||
// Returns the input arguments for VOP[12C] instructions for the given SrcVT.
|
||||
class getIns32 <RegisterClass Src0RC, RegisterClass Src1RC, int NumSrcArgs> {
|
||||
dag ret = !if(!eq(NumSrcArgs, 1), (ins Src0RC:$src0), // VOP1
|
||||
!if(!eq(NumSrcArgs, 2), (ins Src0RC:$src0, Src1RC:$src1), // VOP2
|
||||
(ins)));
|
||||
}
|
||||
|
||||
// Returns the input arguments for VOP3 instructions for the given SrcVT.
|
||||
class getIns64 <RegisterClass Src0RC, RegisterClass Src1RC,
|
||||
RegisterClass Src2RC, int NumSrcArgs,
|
||||
bit HasModifiers> {
|
||||
|
||||
dag ret =
|
||||
!if (!eq(NumSrcArgs, 1),
|
||||
!if (!eq(HasModifiers, 1),
|
||||
// VOP1 with modifiers
|
||||
(ins InputModsNoDefault:$src0_modifiers, Src0RC:$src0,
|
||||
i32imm:$clamp, i32imm:$omod)
|
||||
/* else */,
|
||||
// VOP1 without modifiers
|
||||
(ins Src0RC:$src0)
|
||||
/* endif */ ),
|
||||
!if (!eq(NumSrcArgs, 2),
|
||||
!if (!eq(HasModifiers, 1),
|
||||
// VOP 2 with modifiers
|
||||
(ins InputModsNoDefault:$src0_modifiers, Src0RC:$src0,
|
||||
InputModsNoDefault:$src1_modifiers, Src1RC:$src1,
|
||||
i32imm:$clamp, i32imm:$omod)
|
||||
/* else */,
|
||||
// VOP2 without modifiers
|
||||
(ins Src0RC:$src0, Src1RC:$src1)
|
||||
/* endif */ )
|
||||
/* NumSrcArgs == 3 */,
|
||||
!if (!eq(HasModifiers, 1),
|
||||
// VOP3 with modifiers
|
||||
(ins InputModsNoDefault:$src0_modifiers, Src0RC:$src0,
|
||||
InputModsNoDefault:$src1_modifiers, Src1RC:$src1,
|
||||
InputModsNoDefault:$src2_modifiers, Src2RC:$src2,
|
||||
i32imm:$clamp, i32imm:$omod)
|
||||
/* else */,
|
||||
// VOP3 without modifiers
|
||||
(ins Src0RC:$src0, Src1RC:$src1, Src2RC:$src2)
|
||||
/* endif */ )));
|
||||
}
|
||||
|
||||
// Returns the assembly string for the inputs and outputs of a VOP[12C]
|
||||
// instruction. This does not add the _e32 suffix, so it can be reused
|
||||
// by getAsm64.
|
||||
class getAsm32 <int NumSrcArgs> {
|
||||
string src1 = ", $src1";
|
||||
string src2 = ", $src2";
|
||||
string ret = " $dst, $src0"#
|
||||
!if(!eq(NumSrcArgs, 1), "", src1)#
|
||||
!if(!eq(NumSrcArgs, 3), src2, "");
|
||||
}
|
||||
|
||||
// Returns the assembly string for the inputs and outputs of a VOP3
|
||||
// instruction.
|
||||
class getAsm64 <int NumSrcArgs, bit HasModifiers> {
|
||||
string src0 = "$src0_modifiers,";
|
||||
string src1 = !if(!eq(NumSrcArgs, 1), "", " $src1_modifiers,");
|
||||
string src2 = !if(!eq(NumSrcArgs, 3), " $src2_modifiers,", "");
|
||||
string ret =
|
||||
!if(!eq(HasModifiers, 0),
|
||||
getAsm32<NumSrcArgs>.ret,
|
||||
" $dst, "#src0#src1#src2#" $clamp, $omod");
|
||||
}
|
||||
|
||||
|
||||
class VOPProfile <list<ValueType> _ArgVT> {
|
||||
|
||||
field list<ValueType> ArgVT = _ArgVT;
|
||||
|
||||
field ValueType DstVT = ArgVT[0];
|
||||
field ValueType Src0VT = ArgVT[1];
|
||||
field ValueType Src1VT = ArgVT[2];
|
||||
field ValueType Src2VT = ArgVT[3];
|
||||
field RegisterClass DstRC = getVALUDstForVT<DstVT>.ret;
|
||||
field RegisterClass Src0RC32 = getVOPSrc0ForVT<Src0VT>.ret;
|
||||
field RegisterClass Src1RC32 = getVOPSrc1ForVT<Src1VT>.ret;
|
||||
field RegisterClass Src0RC64 = getVOP3SrcForVT<Src0VT>.ret;
|
||||
field RegisterClass Src1RC64 = getVOP3SrcForVT<Src1VT>.ret;
|
||||
field RegisterClass Src2RC64 = getVOP3SrcForVT<Src2VT>.ret;
|
||||
|
||||
field int NumSrcArgs = getNumSrcArgs<Src1VT, Src2VT>.ret;
|
||||
field bit HasModifiers = hasModifiers<Src0VT>.ret;
|
||||
|
||||
field dag Outs = (outs DstRC:$dst);
|
||||
|
||||
field dag Ins32 = getIns32<Src0RC32, Src1RC32, NumSrcArgs>.ret;
|
||||
field dag Ins64 = getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs,
|
||||
HasModifiers>.ret;
|
||||
|
||||
field string Asm32 = "_e32 "#getAsm32<NumSrcArgs>.ret;
|
||||
field string Asm64 = getAsm64<NumSrcArgs, HasModifiers>.ret;
|
||||
}
|
||||
|
||||
def VOP_F32_F32 : VOPProfile <[f32, f32, untyped, untyped]>;
|
||||
def VOP_F32_F64 : VOPProfile <[f32, f64, untyped, untyped]>;
|
||||
def VOP_F32_I32 : VOPProfile <[f32, i32, untyped, untyped]>;
|
||||
def VOP_F64_F32 : VOPProfile <[f64, f32, untyped, untyped]>;
|
||||
def VOP_F64_F64 : VOPProfile <[f64, f64, untyped, untyped]>;
|
||||
def VOP_F64_I32 : VOPProfile <[f64, i32, untyped, untyped]>;
|
||||
def VOP_I32_F32 : VOPProfile <[i32, f32, untyped, untyped]>;
|
||||
def VOP_I32_F64 : VOPProfile <[i32, f64, untyped, untyped]>;
|
||||
def VOP_I32_I32 : VOPProfile <[i32, i32, untyped, untyped]>;
|
||||
|
||||
def VOP_F32_F32_F32 : VOPProfile <[f32, f32, f32, untyped]>;
|
||||
def VOP_F32_F32_I32 : VOPProfile <[f32, f32, i32, untyped]>;
|
||||
def VOP_F64_F64_F64 : VOPProfile <[f64, f64, f64, untyped]>;
|
||||
def VOP_F64_F64_I32 : VOPProfile <[f64, f64, i32, untyped]>;
|
||||
def VOP_I32_F32_F32 : VOPProfile <[i32, f32, f32, untyped]>;
|
||||
def VOP_I32_I32_I32 : VOPProfile <[i32, i32, i32, untyped]>;
|
||||
def VOP_I32_I32_I32_VCC : VOPProfile <[i32, i32, i32, untyped]> {
|
||||
let Src0RC32 = VReg_32;
|
||||
}
|
||||
def VOP_I64_I64_I32 : VOPProfile <[i64, i64, i32, untyped]>;
|
||||
def VOP_I64_I64_I64 : VOPProfile <[i64, i64, i64, untyped]>;
|
||||
|
||||
def VOP_F32_F32_F32_F32 : VOPProfile <[f32, f32, f32, f32]>;
|
||||
def VOP_F64_F64_F64_F64 : VOPProfile <[f64, f64, f64, f64]>;
|
||||
def VOP_I32_I32_I32_I32 : VOPProfile <[i32, i32, i32, i32]>;
|
||||
def VOP_I64_I32_I32_I64 : VOPProfile <[i64, i32, i32, i64]>;
|
||||
|
||||
|
||||
class VOP <string opName> {
|
||||
string OpName = opName;
|
||||
}
|
||||
|
@ -284,6 +488,17 @@ class SIMCInstr <string pseudo, int subtarget> {
|
|||
int Subtarget = subtarget;
|
||||
}
|
||||
|
||||
class VOP3DisableFields <bit HasSrc1, bit HasSrc2, bit HasModifiers> {
|
||||
|
||||
bits<2> src0_modifiers = !if(HasModifiers, ?, 0);
|
||||
bits<2> src1_modifiers = !if(HasModifiers, !if(HasSrc1, ?, 0), 0);
|
||||
bits<2> src2_modifiers = !if(HasModifiers, !if(HasSrc2, ? ,0) ,0);
|
||||
bits<2> omod = !if(HasModifiers, ?, 0);
|
||||
bits<1> clamp = !if(HasModifiers, ?, 0);
|
||||
bits<9> src1 = !if(HasSrc1, ?, 0);
|
||||
bits<9> src2 = !if(HasSrc2, ?, 0);
|
||||
}
|
||||
|
||||
class VOP3_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
|
||||
VOP3Common <outs, ins, "", pattern>,
|
||||
VOP <opName>,
|
||||
|
@ -296,212 +511,259 @@ class VOP3_Real_si <bits<9> op, dag outs, dag ins, string asm, string opName> :
|
|||
SIMCInstr<opName, SISubtarget.SI>;
|
||||
|
||||
multiclass VOP3_m <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern,
|
||||
string opName> {
|
||||
string opName, int NumSrcArgs, bit HasMods = 1> {
|
||||
|
||||
def "" : VOP3_Pseudo <outs, ins, pattern, opName>;
|
||||
|
||||
def _si : VOP3_Real_si <op, outs, ins, asm, opName>;
|
||||
def _si : VOP3_Real_si <op, outs, ins, asm, opName>,
|
||||
VOP3DisableFields<!if(!eq(NumSrcArgs, 1), 0, 1),
|
||||
!if(!eq(NumSrcArgs, 2), 0, 1),
|
||||
HasMods>;
|
||||
|
||||
}
|
||||
|
||||
multiclass VOP3_1_m <bits<8> op, dag outs, dag ins, string asm,
|
||||
list<dag> pattern, string opName> {
|
||||
list<dag> pattern, string opName, bit HasMods = 1> {
|
||||
|
||||
def "" : VOP3_Pseudo <outs, ins, pattern, opName>;
|
||||
|
||||
let src1 = 0, src1_modifiers = 0, src2 = 0, src2_modifiers = 0 in {
|
||||
|
||||
def _si : VOP3_Real_si <
|
||||
{1, 1, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
|
||||
outs, ins, asm, opName
|
||||
>;
|
||||
|
||||
} // src1 = 0, src1_modifiers = 0, src2 = 0, src2_modifiers = 0
|
||||
outs, ins, asm, opName>,
|
||||
VOP3DisableFields<0, 0, HasMods>;
|
||||
}
|
||||
|
||||
multiclass VOP3_2_m <bits<6> op, dag outs, dag ins, string asm,
|
||||
list<dag> pattern, string opName, string revOp> {
|
||||
multiclass VOP3_2_m <bits<9> op, dag outs, dag ins, string asm,
|
||||
list<dag> pattern, string opName, string revOp,
|
||||
bit HasMods = 1, bit UseFullOp = 0> {
|
||||
|
||||
def "" : VOP3_Pseudo <outs, ins, pattern, opName>,
|
||||
VOP2_REV<revOp#"_e64", !eq(revOp, opName)>;
|
||||
|
||||
def _si : VOP3_Real_si <op,
|
||||
outs, ins, asm, opName>,
|
||||
VOP2_REV<revOp#"_e64_si", !eq(revOp, opName)>,
|
||||
VOP3DisableFields<1, 0, HasMods>;
|
||||
}
|
||||
|
||||
multiclass VOP3b_2_m <bits<9> op, dag outs, dag ins, string asm,
|
||||
list<dag> pattern, string opName, string revOp,
|
||||
bit HasMods = 1, bit UseFullOp = 0> {
|
||||
def "" : VOP3_Pseudo <outs, ins, pattern, opName>,
|
||||
VOP2_REV<revOp#"_e64", !eq(revOp, opName)>;
|
||||
|
||||
// The VOP2 variant puts the carry out into VCC, the VOP3 variant
|
||||
// can write it into any SGPR. We currently don't use the carry out,
|
||||
// so for now hardcode it to VCC as well.
|
||||
let sdst = SIOperand.VCC, Defs = [VCC] in {
|
||||
def _si : VOP3b <op, outs, ins, asm, pattern>,
|
||||
VOP3DisableFields<1, 0, HasMods>,
|
||||
SIMCInstr<opName, SISubtarget.SI>,
|
||||
VOP2_REV<revOp#"_e64_si", !eq(revOp, opName)>;
|
||||
} // End sdst = SIOperand.VCC, Defs = [VCC]
|
||||
}
|
||||
|
||||
multiclass VOP3_C_m <bits<8> op, dag outs, dag ins, string asm,
|
||||
list<dag> pattern, string opName,
|
||||
bit HasMods, bit defExec> {
|
||||
|
||||
def "" : VOP3_Pseudo <outs, ins, pattern, opName>;
|
||||
|
||||
let src2 = 0, src2_modifiers = 0 in {
|
||||
|
||||
def _si : VOP3_Real_si <
|
||||
{1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
|
||||
outs, ins, asm, opName>,
|
||||
VOP2_REV<revOp#"_e64", !eq(revOp, opName)>;
|
||||
|
||||
} // src2 = 0, src2_modifiers = 0
|
||||
}
|
||||
|
||||
// This must always be right before the operand being input modified.
|
||||
def InputMods : OperandWithDefaultOps <i32, (ops (i32 0))> {
|
||||
let PrintMethod = "printOperandAndMods";
|
||||
}
|
||||
|
||||
multiclass VOP1_Helper <bits<8> op, RegisterClass drc, RegisterClass src,
|
||||
string opName, list<dag> pattern> {
|
||||
|
||||
def _e32 : VOP1 <
|
||||
op, (outs drc:$dst), (ins src:$src0),
|
||||
opName#"_e32 $dst, $src0", pattern
|
||||
>, VOP <opName>;
|
||||
|
||||
defm _e64 : VOP3_1_m <
|
||||
op,
|
||||
(outs drc:$dst),
|
||||
(ins InputMods:$src0_modifiers, src:$src0, i32imm:$clamp, i32imm:$omod),
|
||||
opName#"_e64 $dst, $src0_modifiers, $clamp, $omod", [], opName>;
|
||||
}
|
||||
|
||||
multiclass VOP1_32 <bits<8> op, string opName, list<dag> pattern>
|
||||
: VOP1_Helper <op, VReg_32, VSrc_32, opName, pattern>;
|
||||
|
||||
multiclass VOP1_64 <bits<8> op, string opName, list<dag> pattern>
|
||||
: VOP1_Helper <op, VReg_64, VSrc_64, opName, pattern>;
|
||||
|
||||
multiclass VOP1_32_64 <bits<8> op, string opName, list<dag> pattern>
|
||||
: VOP1_Helper <op, VReg_32, VSrc_64, opName, pattern>;
|
||||
|
||||
multiclass VOP1_64_32 <bits<8> op, string opName, list<dag> pattern>
|
||||
: VOP1_Helper <op, VReg_64, VSrc_32, opName, pattern>;
|
||||
|
||||
multiclass VOP2_Helper <bits<6> op, RegisterClass vrc, RegisterClass arc,
|
||||
string opName, list<dag> pattern, string revOp> {
|
||||
def _e32 : VOP2 <
|
||||
op, (outs vrc:$dst), (ins arc:$src0, vrc:$src1),
|
||||
opName#"_e32 $dst, $src0, $src1", pattern
|
||||
>, VOP <opName>, VOP2_REV<revOp#"_e32", !eq(revOp, opName)>;
|
||||
|
||||
defm _e64 : VOP3_2_m <
|
||||
op,
|
||||
(outs vrc:$dst),
|
||||
(ins InputMods:$src0_modifiers, arc:$src0,
|
||||
InputMods:$src1_modifiers, arc:$src1,
|
||||
i32imm:$clamp, i32imm:$omod),
|
||||
opName#"_e64 $dst, $src0_modifiers, $src1_modifiers, $clamp, $omod", [],
|
||||
opName, revOp>;
|
||||
}
|
||||
|
||||
multiclass VOP2_32 <bits<6> op, string opName, list<dag> pattern,
|
||||
string revOp = opName>
|
||||
: VOP2_Helper <op, VReg_32, VSrc_32, opName, pattern, revOp>;
|
||||
|
||||
multiclass VOP2_64 <bits<6> op, string opName, list<dag> pattern,
|
||||
string revOp = opName>
|
||||
: VOP2_Helper <op, VReg_64, VSrc_64, opName, pattern, revOp>;
|
||||
|
||||
multiclass VOP2b_32 <bits<6> op, string opName, list<dag> pattern,
|
||||
RegisterClass src0_rc, string revOp = opName> {
|
||||
|
||||
def _e32 : VOP2 <
|
||||
op, (outs VReg_32:$dst), (ins src0_rc:$src0, VReg_32:$src1),
|
||||
opName#"_e32 $dst, $src0, $src1", pattern
|
||||
>, VOP <opName>, VOP2_REV<revOp#"_e32", !eq(revOp, opName)>;
|
||||
|
||||
def _e64 : VOP3b <
|
||||
{1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
|
||||
(outs VReg_32:$dst),
|
||||
(ins InputMods: $src0_modifiers, VSrc_32:$src0,
|
||||
InputMods:$src1_modifiers, VSrc_32:$src1,
|
||||
i32imm:$clamp, i32imm:$omod),
|
||||
opName#"_e64 $dst, $src0_modifiers, $src1_modifiers, $clamp, $omod", []
|
||||
>, VOP <opName>, VOP2_REV<revOp#"_e64", !eq(revOp, opName)> {
|
||||
let src2 = 0;
|
||||
let src2_modifiers = 0;
|
||||
/* the VOP2 variant puts the carry out into VCC, the VOP3 variant
|
||||
can write it into any SGPR. We currently don't use the carry out,
|
||||
so for now hardcode it to VCC as well */
|
||||
let sdst = SIOperand.VCC;
|
||||
}
|
||||
}
|
||||
|
||||
multiclass VOPC_Helper <bits<8> op, RegisterClass vrc, RegisterClass arc,
|
||||
string opName, ValueType vt, PatLeaf cond, bit defExec = 0> {
|
||||
def _e32 : VOPC <
|
||||
op, (ins arc:$src0, vrc:$src1),
|
||||
opName#"_e32 $dst, $src0, $src1", []
|
||||
>, VOP <opName> {
|
||||
let Defs = !if(defExec, [EXEC], []);
|
||||
}
|
||||
|
||||
def _e64 : VOP3 <
|
||||
{0, op{7}, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
|
||||
(outs SReg_64:$dst),
|
||||
(ins InputMods:$src0_modifiers, arc:$src0,
|
||||
InputMods:$src1_modifiers, arc:$src1,
|
||||
InstFlag:$clamp, InstFlag:$omod),
|
||||
opName#"_e64 $dst, $src0_modifiers, $src1_modifiers, $clamp, $omod",
|
||||
!if(!eq(!cast<string>(cond), "COND_NULL"), []<dag>,
|
||||
[(set SReg_64:$dst, (i1 (setcc (vt arc:$src0), arc:$src1, cond)))]
|
||||
)
|
||||
>, VOP <opName> {
|
||||
outs, ins, asm, opName>,
|
||||
VOP3DisableFields<1, 0, HasMods> {
|
||||
let Defs = !if(defExec, [EXEC], []);
|
||||
let src2 = 0;
|
||||
let src2_modifiers = 0;
|
||||
}
|
||||
}
|
||||
|
||||
multiclass VOPC_32 <bits<8> op, string opName,
|
||||
ValueType vt = untyped, PatLeaf cond = COND_NULL>
|
||||
: VOPC_Helper <op, VReg_32, VSrc_32, opName, vt, cond>;
|
||||
multiclass VOP1_Helper <bits<8> op, string opName, dag outs,
|
||||
dag ins32, string asm32, list<dag> pat32,
|
||||
dag ins64, string asm64, list<dag> pat64,
|
||||
bit HasMods> {
|
||||
|
||||
multiclass VOPC_64 <bits<8> op, string opName,
|
||||
ValueType vt = untyped, PatLeaf cond = COND_NULL>
|
||||
: VOPC_Helper <op, VReg_64, VSrc_64, opName, vt, cond>;
|
||||
def _e32 : VOP1 <op, outs, ins32, opName#asm32, pat32>, VOP<opName>;
|
||||
|
||||
multiclass VOPCX_32 <bits<8> op, string opName,
|
||||
ValueType vt = untyped, PatLeaf cond = COND_NULL>
|
||||
: VOPC_Helper <op, VReg_32, VSrc_32, opName, vt, cond, 1>;
|
||||
defm _e64 : VOP3_1_m <op, outs, ins64, opName#"_e64"#asm64, pat64, opName, HasMods>;
|
||||
}
|
||||
|
||||
multiclass VOPCX_64 <bits<8> op, string opName,
|
||||
ValueType vt = untyped, PatLeaf cond = COND_NULL>
|
||||
: VOPC_Helper <op, VReg_64, VSrc_64, opName, vt, cond, 1>;
|
||||
|
||||
multiclass VOP3_32 <bits<9> op, string opName, list<dag> pattern> : VOP3_m <
|
||||
op, (outs VReg_32:$dst),
|
||||
(ins InputMods: $src0_modifiers, VSrc_32:$src0, InputMods:$src1_modifiers,
|
||||
VSrc_32:$src1, InputMods:$src2_modifiers, VSrc_32:$src2,
|
||||
InstFlag:$clamp, InstFlag:$omod),
|
||||
opName#" $dst, $src0_modifiers, $src1, $src2, $clamp, $omod", pattern, opName
|
||||
multiclass VOP1Inst <bits<8> op, string opName, VOPProfile P,
|
||||
SDPatternOperator node = null_frag> : VOP1_Helper <
|
||||
op, opName, P.Outs,
|
||||
P.Ins32, P.Asm32, [],
|
||||
P.Ins64, P.Asm64,
|
||||
!if(P.HasModifiers,
|
||||
[(set P.DstVT:$dst, (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0,
|
||||
i32:$src0_modifiers, i32:$clamp, i32:$omod))))],
|
||||
[(set P.DstVT:$dst, (node P.Src0VT:$src0))]),
|
||||
P.HasModifiers
|
||||
>;
|
||||
|
||||
class VOP3_64_32 <bits <9> op, string opName, list<dag> pattern> : VOP3 <
|
||||
op, (outs VReg_64:$dst),
|
||||
(ins VSrc_64:$src0, VSrc_32:$src1),
|
||||
opName#" $dst, $src0, $src1", pattern
|
||||
>, VOP <opName> {
|
||||
class VOP2_e32 <bits<6> op, string opName, dag outs, dag ins, string asm,
|
||||
list<dag> pattern, string revOp> :
|
||||
VOP2 <op, outs, ins, opName#asm, pattern>,
|
||||
VOP <opName>,
|
||||
VOP2_REV<revOp#"_e32", !eq(revOp, opName)>;
|
||||
|
||||
let src2 = 0;
|
||||
let src2_modifiers = 0;
|
||||
let src0_modifiers = 0;
|
||||
let clamp = 0;
|
||||
let omod = 0;
|
||||
multiclass VOP2_Helper <bits<6> op, string opName, dag outs,
|
||||
dag ins32, string asm32, list<dag> pat32,
|
||||
dag ins64, string asm64, list<dag> pat64,
|
||||
string revOp, bit HasMods> {
|
||||
def _e32 : VOP2_e32 <op, opName, outs, ins32, asm32, pat32, revOp>;
|
||||
|
||||
defm _e64 : VOP3_2_m <
|
||||
{1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
|
||||
outs, ins64, opName#"_e64"#asm64, pat64, opName, revOp, HasMods
|
||||
>;
|
||||
}
|
||||
|
||||
class VOP3_64 <bits<9> op, string opName, list<dag> pattern> : VOP3 <
|
||||
op, (outs VReg_64:$dst),
|
||||
(ins InputMods:$src0_modifiers, VSrc_64:$src0,
|
||||
InputMods:$src1_modifiers, VSrc_64:$src1,
|
||||
InputMods:$src2_modifiers, VSrc_64:$src2,
|
||||
InstFlag:$clamp, InstFlag:$omod),
|
||||
opName#" $dst, $src0_modifiers, $src1_modifiers, $src2_modifiers, $clamp, $omod", pattern
|
||||
>, VOP <opName>;
|
||||
multiclass VOP2Inst <bits<6> op, string opName, VOPProfile P,
|
||||
SDPatternOperator node = null_frag,
|
||||
string revOp = opName> : VOP2_Helper <
|
||||
op, opName, P.Outs,
|
||||
P.Ins32, P.Asm32, [],
|
||||
P.Ins64, P.Asm64,
|
||||
!if(P.HasModifiers,
|
||||
[(set P.DstVT:$dst,
|
||||
(node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
|
||||
i32:$clamp, i32:$omod)),
|
||||
(P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
|
||||
[(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1))]),
|
||||
revOp, P.HasModifiers
|
||||
>;
|
||||
|
||||
multiclass VOP2b_Helper <bits<6> op, string opName, dag outs,
|
||||
dag ins32, string asm32, list<dag> pat32,
|
||||
dag ins64, string asm64, list<dag> pat64,
|
||||
string revOp, bit HasMods> {
|
||||
|
||||
def _e32 : VOP2_e32 <op, opName, outs, ins32, asm32, pat32, revOp>;
|
||||
|
||||
defm _e64 : VOP3b_2_m <
|
||||
{1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
|
||||
outs, ins64, opName#"_e64"#asm64, pat64, opName, revOp, HasMods
|
||||
>;
|
||||
}
|
||||
|
||||
multiclass VOP2bInst <bits<6> op, string opName, VOPProfile P,
|
||||
SDPatternOperator node = null_frag,
|
||||
string revOp = opName> : VOP2b_Helper <
|
||||
op, opName, P.Outs,
|
||||
P.Ins32, P.Asm32, [],
|
||||
P.Ins64, P.Asm64,
|
||||
!if(P.HasModifiers,
|
||||
[(set P.DstVT:$dst,
|
||||
(node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
|
||||
i32:$clamp, i32:$omod)),
|
||||
(P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
|
||||
[(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1))]),
|
||||
revOp, P.HasModifiers
|
||||
>;
|
||||
|
||||
multiclass VOPC_Helper <bits<8> op, string opName,
|
||||
dag ins32, string asm32, list<dag> pat32,
|
||||
dag out64, dag ins64, string asm64, list<dag> pat64,
|
||||
bit HasMods, bit DefExec> {
|
||||
def _e32 : VOPC <op, ins32, opName#asm32, pat32>, VOP <opName> {
|
||||
let Defs = !if(DefExec, [EXEC], []);
|
||||
}
|
||||
|
||||
defm _e64 : VOP3_C_m <op, out64, ins64, opName#"_e64"#asm64, pat64, opName,
|
||||
HasMods, DefExec>;
|
||||
}
|
||||
|
||||
multiclass VOPCInst <bits<8> op, string opName,
|
||||
VOPProfile P, PatLeaf cond = COND_NULL,
|
||||
bit DefExec = 0> : VOPC_Helper <
|
||||
op, opName,
|
||||
P.Ins32, P.Asm32, [],
|
||||
(outs SReg_64:$dst), P.Ins64, P.Asm64,
|
||||
!if(P.HasModifiers,
|
||||
[(set i1:$dst,
|
||||
(setcc (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
|
||||
i32:$clamp, i32:$omod)),
|
||||
(P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)),
|
||||
cond))],
|
||||
[(set i1:$dst, (setcc P.Src0VT:$src0, P.Src1VT:$src1, cond))]),
|
||||
P.HasModifiers, DefExec
|
||||
>;
|
||||
|
||||
multiclass VOPC_F32 <bits<8> op, string opName, PatLeaf cond = COND_NULL> :
|
||||
VOPCInst <op, opName, VOP_F32_F32_F32, cond>;
|
||||
|
||||
multiclass VOPC_F64 <bits<8> op, string opName, PatLeaf cond = COND_NULL> :
|
||||
VOPCInst <op, opName, VOP_F64_F64_F64, cond>;
|
||||
|
||||
multiclass VOPC_I32 <bits<8> op, string opName, PatLeaf cond = COND_NULL> :
|
||||
VOPCInst <op, opName, VOP_I32_I32_I32, cond>;
|
||||
|
||||
multiclass VOPC_I64 <bits<8> op, string opName, PatLeaf cond = COND_NULL> :
|
||||
VOPCInst <op, opName, VOP_I64_I64_I64, cond>;
|
||||
|
||||
|
||||
class VOP3b_Helper <bits<9> op, RegisterClass vrc, RegisterClass arc,
|
||||
string opName, list<dag> pattern> : VOP3 <
|
||||
multiclass VOPCX <bits<8> op, string opName, VOPProfile P,
|
||||
PatLeaf cond = COND_NULL>
|
||||
: VOPCInst <op, opName, P, cond, 1>;
|
||||
|
||||
multiclass VOPCX_F32 <bits<8> op, string opName, PatLeaf cond = COND_NULL> :
|
||||
VOPCX <op, opName, VOP_F32_F32_F32, cond>;
|
||||
|
||||
multiclass VOPCX_F64 <bits<8> op, string opName, PatLeaf cond = COND_NULL> :
|
||||
VOPCX <op, opName, VOP_F64_F64_F64, cond>;
|
||||
|
||||
multiclass VOPCX_I32 <bits<8> op, string opName, PatLeaf cond = COND_NULL> :
|
||||
VOPCX <op, opName, VOP_I32_I32_I32, cond>;
|
||||
|
||||
multiclass VOPCX_I64 <bits<8> op, string opName, PatLeaf cond = COND_NULL> :
|
||||
VOPCX <op, opName, VOP_I64_I64_I64, cond>;
|
||||
|
||||
multiclass VOP3_Helper <bits<9> op, string opName, dag outs, dag ins, string asm,
|
||||
list<dag> pat, int NumSrcArgs, bit HasMods> : VOP3_m <
|
||||
op, outs, ins, opName#asm, pat, opName, NumSrcArgs, HasMods
|
||||
>;
|
||||
|
||||
multiclass VOP3Inst <bits<9> op, string opName, VOPProfile P,
|
||||
SDPatternOperator node = null_frag> : VOP3_Helper <
|
||||
op, opName, P.Outs, P.Ins64, P.Asm64,
|
||||
!if(!eq(P.NumSrcArgs, 3),
|
||||
!if(P.HasModifiers,
|
||||
[(set P.DstVT:$dst,
|
||||
(node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
|
||||
i32:$clamp, i32:$omod)),
|
||||
(P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)),
|
||||
(P.Src2VT (VOP3Mods P.Src2VT:$src2, i32:$src2_modifiers))))],
|
||||
[(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1,
|
||||
P.Src2VT:$src2))]),
|
||||
!if(!eq(P.NumSrcArgs, 2),
|
||||
!if(P.HasModifiers,
|
||||
[(set P.DstVT:$dst,
|
||||
(node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
|
||||
i32:$clamp, i32:$omod)),
|
||||
(P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
|
||||
[(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1))])
|
||||
/* P.NumSrcArgs == 1 */,
|
||||
!if(P.HasModifiers,
|
||||
[(set P.DstVT:$dst,
|
||||
(node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
|
||||
i32:$clamp, i32:$omod))))],
|
||||
[(set P.DstVT:$dst, (node P.Src0VT:$src0))]))),
|
||||
P.NumSrcArgs, P.HasModifiers
|
||||
>;
|
||||
|
||||
multiclass VOP3b_Helper <bits<9> op, RegisterClass vrc, RegisterClass arc,
|
||||
string opName, list<dag> pattern> :
|
||||
VOP3b_2_m <
|
||||
op, (outs vrc:$dst0, SReg_64:$dst1),
|
||||
(ins arc:$src0, arc:$src1, arc:$src2,
|
||||
InstFlag:$abs, InstFlag:$clamp, InstFlag:$omod, InstFlag:$neg),
|
||||
opName#" $dst0, $dst1, $src0, $src1, $src2, $abs, $clamp, $omod, $neg", pattern
|
||||
>, VOP <opName>;
|
||||
opName#" $dst0, $dst1, $src0, $src1, $src2, $abs, $clamp, $omod, $neg", pattern,
|
||||
opName, opName, 1, 1
|
||||
>;
|
||||
|
||||
|
||||
class VOP3b_64 <bits<9> op, string opName, list<dag> pattern> :
|
||||
multiclass VOP3b_64 <bits<9> op, string opName, list<dag> pattern> :
|
||||
VOP3b_Helper <op, VReg_64, VSrc_64, opName, pattern>;
|
||||
|
||||
class VOP3b_32 <bits<9> op, string opName, list<dag> pattern> :
|
||||
multiclass VOP3b_32 <bits<9> op, string opName, list<dag> pattern> :
|
||||
VOP3b_Helper <op, VReg_32, VSrc_32, opName, pattern>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -136,11 +136,7 @@ bool SILowerI1Copies::runOnMachineFunction(MachineFunction &MF) {
|
|||
SrcRC == &AMDGPU::VReg_1RegClass) {
|
||||
BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(AMDGPU::V_CMP_NE_I32_e64))
|
||||
.addOperand(MI.getOperand(0))
|
||||
.addImm(0)
|
||||
.addOperand(MI.getOperand(1))
|
||||
.addImm(0)
|
||||
.addImm(0)
|
||||
.addImm(0)
|
||||
.addImm(0);
|
||||
MI.eraseFromParent();
|
||||
}
|
||||
|
|
|
@ -93,7 +93,7 @@ static bool canShrink(MachineInstr &MI, const SIInstrInfo *TII,
|
|||
const MachineOperand *Src1Mod =
|
||||
TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers);
|
||||
|
||||
if (Src1 && (!isVGPR(Src1, TRI, MRI) || Src1Mod->getImm() != 0))
|
||||
if (Src1 && (!isVGPR(Src1, TRI, MRI) || (Src1Mod && Src1Mod->getImm() != 0)))
|
||||
return false;
|
||||
|
||||
// We don't need to check src0, all input types are legal, so just make
|
||||
|
|
|
@ -50,8 +50,9 @@ entry:
|
|||
}
|
||||
|
||||
; SI-CHECK-LABEL: @fabs_fold
|
||||
; SI-CHECK: S_LOAD_DWORD [[ABS_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
|
||||
; SI-CHECK-NOT: V_AND_B32_e32
|
||||
; SI-CHECK: V_MUL_F32_e64 v{{[0-9]+}}, s{{[0-9]+}}, |v{{[0-9]+}}|
|
||||
; SI-CHECK: V_MUL_F32_e64 v{{[0-9]+}}, |[[ABS_VALUE]]|, v{{[0-9]+}}
|
||||
define void @fabs_fold(float addrspace(1)* %out, float %in0, float %in1) {
|
||||
entry:
|
||||
%0 = call float @fabs(float %in0)
|
||||
|
|
|
@ -61,8 +61,9 @@ entry:
|
|||
}
|
||||
|
||||
; SI-CHECK-LABEL: @fneg_fold
|
||||
; SI-CHECK: S_LOAD_DWORD [[NEG_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
|
||||
; SI-CHECK-NOT: V_XOR_B32
|
||||
; SI-CHECK: V_MUL_F32_e64 v{{[0-9]+}}, s{{[0-9]+}}, -v{{[0-9]+}}
|
||||
; SI-CHECK: V_MUL_F32_e64 v{{[0-9]+}}, -[[NEG_VALUE]], v{{[0-9]+}}
|
||||
define void @fneg_fold(float addrspace(1)* %out, float %in) {
|
||||
entry:
|
||||
%0 = fsub float -0.0, %in
|
||||
|
|
|
@ -20,8 +20,8 @@ declare void @llvm.AMDGPU.store.output(float, i32)
|
|||
; R600-CHECK-DAG: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, -KC0[3].Z
|
||||
; R600-CHECK-DAG: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, -KC0[3].Y
|
||||
; SI-CHECK: @fsub_v2f32
|
||||
; SI-CHECK: V_SUB_F32
|
||||
; SI-CHECK: V_SUB_F32
|
||||
; SI-CHECK: V_SUBREV_F32
|
||||
; SI-CHECK: V_SUBREV_F32
|
||||
define void @fsub_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) {
|
||||
entry:
|
||||
%0 = fsub <2 x float> %a, %b
|
||||
|
@ -35,10 +35,10 @@ entry:
|
|||
; R600-CHECK: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}}
|
||||
; R600-CHECK: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}}
|
||||
; SI-CHECK: @fsub_v4f32
|
||||
; SI-CHECK: V_SUB_F32
|
||||
; SI-CHECK: V_SUB_F32
|
||||
; SI-CHECK: V_SUB_F32
|
||||
; SI-CHECK: V_SUB_F32
|
||||
; SI-CHECK: V_SUBREV_F32
|
||||
; SI-CHECK: V_SUBREV_F32
|
||||
; SI-CHECK: V_SUBREV_F32
|
||||
; SI-CHECK: V_SUBREV_F32
|
||||
define void @fsub_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
|
||||
%b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
|
||||
%a = load <4 x float> addrspace(1) * %in
|
||||
|
|
|
@ -23,7 +23,7 @@ entry:
|
|||
; EG: BFE_INT {{[* ]*}}T{{[0-9]}}.{{[XYZW]}}, PV.[[MUL_CHAN]], 0.0, literal.x
|
||||
; EG: 16
|
||||
; SI: V_MUL_U32_U24_e{{(32|64)}} [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
|
||||
; SI: V_BFE_I32 v{{[0-9]}}, [[MUL]], 0, 16,
|
||||
; SI: V_BFE_I32 v{{[0-9]}}, [[MUL]], 0, 16
|
||||
define void @i16_mul24(i32 addrspace(1)* %out, i16 %a, i16 %b) {
|
||||
entry:
|
||||
%0 = mul i16 %a, %b
|
||||
|
@ -37,7 +37,7 @@ entry:
|
|||
; The result must be sign-extended
|
||||
; EG: BFE_INT {{[* ]*}}T{{[0-9]}}.{{[XYZW]}}, PV.[[MUL_CHAN]], 0.0, literal.x
|
||||
; SI: V_MUL_U32_U24_e{{(32|64)}} [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
|
||||
; SI: V_BFE_I32 v{{[0-9]}}, [[MUL]], 0, 8,
|
||||
; SI: V_BFE_I32 v{{[0-9]}}, [[MUL]], 0, 8
|
||||
|
||||
define void @i8_mul24(i32 addrspace(1)* %out, i8 %a, i8 %b) {
|
||||
entry:
|
||||
|
|
|
@ -1,9 +1,4 @@
|
|||
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; XXX: This testis for a bug in the SIShrinkInstruction pass and it will be
|
||||
; relevant once we are selecting 64-bit instructions. We are
|
||||
; currently selecting mostly 32-bit instruction, so the
|
||||
; SIShrinkInstructions pass isn't doing much.
|
||||
; XFAIL: *
|
||||
|
||||
; Test that we correctly commute a sub instruction
|
||||
; FUNC-LABEL: @sub_rev
|
||||
|
|
Loading…
Reference in New Issue