forked from OSchip/llvm-project
Revert r372285 "GlobalISel: Don't materialize immarg arguments to intrinsics"
This broke the Chromium build, causing it to fail with e.g. fatal error: error in backend: Cannot select: t362: v4i32 = X86ISD::VSHLI t392, Constant:i8<15> See llvm-commits thread of r372285 for details. This also reverts r372286, r372287, r372288, r372289, r372290, r372291, r372292, r372293, r372296, and r372297, which seemed to depend on the main commit. > Encode them directly as an imm argument to G_INTRINSIC*. > > Since now intrinsics can now define what parameters are required to be > immediates, avoid using registers for them. Intrinsics could > potentially want a constant that isn't a legal register type. Also, > since G_CONSTANT is subject to CSE and legalization, transforms could > potentially obscure the value (and create extra work for the > selector). The register bank of a G_CONSTANT is also meaningful, so > this could throw off future folding and legalization logic for AMDGPU. > > This will be much more convenient to work with than needing to call > getConstantVRegVal and checking if it may have failed for every > constant intrinsic parameter. AMDGPU has quite a lot of intrinsics wth > immarg operands, many of which need inspection during lowering. Having > to find the value in a register is going to add a lot of boilerplate > and waste compile time. > > SelectionDAG has always provided TargetConstant for constants which > should not be legalized or materialized in a register. The distinction > between Constant and TargetConstant was somewhat fuzzy, and there was > no automatic way to force usage of TargetConstant for certain > intrinsic parameters. They were both ultimately ConstantSDNode, and it > was inconsistently used. It was quite easy to mis-select an > instruction requiring an immediate. For SelectionDAG, start emitting > TargetConstant for these arguments, and using timm to match them. > > Most of the work here is to cleanup target handling of constants. Some > targets process intrinsics through intermediate custom nodes, which > need to preserve TargetConstant usage to match the intrinsic > expectation. Pattern inputs now need to distinguish whether a constant > is merely compatible with an operand or whether it is mandatory. > > The GlobalISelEmitter needs to treat timm as a special case of a leaf > node, simlar to MachineBasicBlock operands. This should also enable > handling of patterns for some G_* instructions with immediates, like > G_FENCE or G_EXTRACT. > > This does include a workaround for a crash in GlobalISelEmitter when > ARM tries to uses "imm" in an output with a "timm" pattern source. llvm-svn: 372314
This commit is contained in:
parent
0cfb78e52a
commit
13bdae8541
|
@ -220,11 +220,6 @@ enum {
|
|||
/// - OpIdx - Operand index
|
||||
GIM_CheckIsMBB,
|
||||
|
||||
/// Check the specified operand is an Imm
|
||||
/// - InsnID - Instruction ID
|
||||
/// - OpIdx - Operand index
|
||||
GIM_CheckIsImm,
|
||||
|
||||
/// Check if the specified operand is safe to fold into the current
|
||||
/// instruction.
|
||||
/// - InsnID - Instruction ID
|
||||
|
|
|
@ -690,19 +690,7 @@ bool InstructionSelector::executeMatchTable(
|
|||
}
|
||||
break;
|
||||
}
|
||||
case GIM_CheckIsImm: {
|
||||
int64_t InsnID = MatchTable[CurrentIdx++];
|
||||
int64_t OpIdx = MatchTable[CurrentIdx++];
|
||||
DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
|
||||
dbgs() << CurrentIdx << ": GIM_CheckIsImm(MIs[" << InsnID
|
||||
<< "]->getOperand(" << OpIdx << "))\n");
|
||||
assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
|
||||
if (!State.MIs[InsnID]->getOperand(OpIdx).isImm()) {
|
||||
if (handleReject() == RejectAndGiveUp)
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case GIM_CheckIsSafeToFold: {
|
||||
int64_t InsnID = MatchTable[CurrentIdx++];
|
||||
DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
|
||||
|
|
|
@ -374,9 +374,6 @@ namespace llvm {
|
|||
/// Returns a mask for which lanes get read/written by the given (register)
|
||||
/// machine operand.
|
||||
LaneBitmask getLaneMaskForMO(const MachineOperand &MO) const;
|
||||
|
||||
/// Returns true if the def register in \p MO has no uses.
|
||||
bool deadDefHasNoUse(const MachineOperand &MO);
|
||||
};
|
||||
|
||||
/// Creates a new SUnit and return a ptr to it.
|
||||
|
|
|
@ -848,11 +848,6 @@ class ImmLeaf<ValueType vt, code pred, SDNodeXForm xform = NOOP_SDNodeXForm,
|
|||
bit IsAPFloat = 0;
|
||||
}
|
||||
|
||||
// Convenience wrapper for ImmLeaf to use timm/TargetConstant instead
|
||||
// of imm/Constant.
|
||||
class TImmLeaf<ValueType vt, code pred, SDNodeXForm xform = NOOP_SDNodeXForm,
|
||||
SDNode ImmNode = timm> : ImmLeaf<vt, pred, xform, ImmNode>;
|
||||
|
||||
// An ImmLeaf except that Imm is an APInt. This is useful when you need to
|
||||
// zero-extend the immediate instead of sign-extend it.
|
||||
//
|
||||
|
|
|
@ -1617,29 +1617,14 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
|
|||
if (isa<FPMathOperator>(CI))
|
||||
MIB->copyIRFlags(CI);
|
||||
|
||||
for (auto &Arg : enumerate(CI.arg_operands())) {
|
||||
for (auto &Arg : CI.arg_operands()) {
|
||||
// Some intrinsics take metadata parameters. Reject them.
|
||||
if (isa<MetadataAsValue>(Arg.value()))
|
||||
if (isa<MetadataAsValue>(Arg))
|
||||
return false;
|
||||
|
||||
// If this is required to be an immediate, don't materialize it in a
|
||||
// register.
|
||||
if (CI.paramHasAttr(Arg.index(), Attribute::ImmArg)) {
|
||||
if (ConstantInt *CI = dyn_cast<ConstantInt>(Arg.value())) {
|
||||
// imm arguments are more convenient than cimm (and realistically
|
||||
// probably sufficient), so use them.
|
||||
assert(CI->getBitWidth() <= 64 &&
|
||||
"large intrinsic immediates not handled");
|
||||
MIB.addImm(CI->getSExtValue());
|
||||
} else {
|
||||
MIB.addFPImm(cast<ConstantFP>(Arg.value()));
|
||||
}
|
||||
} else {
|
||||
ArrayRef<Register> VRegs = getOrCreateVRegs(*Arg.value());
|
||||
if (VRegs.size() > 1)
|
||||
return false;
|
||||
MIB.addUse(VRegs[0]);
|
||||
}
|
||||
ArrayRef<Register> VRegs = getOrCreateVRegs(*Arg);
|
||||
if (VRegs.size() > 1)
|
||||
return false;
|
||||
MIB.addUse(VRegs[0]);
|
||||
}
|
||||
|
||||
// Add a MachineMemOperand if it is a target mem intrinsic.
|
||||
|
|
|
@ -373,13 +373,6 @@ LaneBitmask ScheduleDAGInstrs::getLaneMaskForMO(const MachineOperand &MO) const
|
|||
return TRI->getSubRegIndexLaneMask(SubReg);
|
||||
}
|
||||
|
||||
bool ScheduleDAGInstrs::deadDefHasNoUse(const MachineOperand &MO) {
|
||||
auto RegUse = CurrentVRegUses.find(MO.getReg());
|
||||
if (RegUse == CurrentVRegUses.end())
|
||||
return true;
|
||||
return (RegUse->LaneMask & getLaneMaskForMO(MO)).none();
|
||||
}
|
||||
|
||||
/// Adds register output and data dependencies from this SUnit to instructions
|
||||
/// that occur later in the same scheduling region if they read from or write to
|
||||
/// the virtual register defined at OperIdx.
|
||||
|
@ -409,7 +402,8 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
|
|||
}
|
||||
|
||||
if (MO.isDead()) {
|
||||
assert(deadDefHasNoUse(MO) && "Dead defs should have no uses");
|
||||
assert(CurrentVRegUses.find(Reg) == CurrentVRegUses.end() &&
|
||||
"Dead defs should have no uses");
|
||||
} else {
|
||||
// Add data dependence to all uses we found so far.
|
||||
const TargetSubtargetInfo &ST = MF.getSubtarget();
|
||||
|
|
|
@ -4768,22 +4768,8 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
|
|||
|
||||
// Add all operands of the call to the operand list.
|
||||
for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
|
||||
const Value *Arg = I.getArgOperand(i);
|
||||
if (!I.paramHasAttr(i, Attribute::ImmArg)) {
|
||||
Ops.push_back(getValue(Arg));
|
||||
continue;
|
||||
}
|
||||
|
||||
// Use TargetConstant instead of a regular constant for immarg.
|
||||
EVT VT = TLI.getValueType(*DL, Arg->getType(), true);
|
||||
if (const ConstantInt *CI = dyn_cast<ConstantInt>(Arg)) {
|
||||
assert(CI->getBitWidth() <= 64 &&
|
||||
"large intrinsic immediates not handled");
|
||||
Ops.push_back(DAG.getTargetConstant(*CI, SDLoc(), VT));
|
||||
} else {
|
||||
Ops.push_back(
|
||||
DAG.getTargetConstantFP(*cast<ConstantFP>(Arg), SDLoc(), VT));
|
||||
}
|
||||
SDValue Op = getValue(I.getArgOperand(i));
|
||||
Ops.push_back(Op);
|
||||
}
|
||||
|
||||
SmallVector<EVT, 4> ValueVTs;
|
||||
|
|
|
@ -685,11 +685,11 @@ def logical_imm64_not : Operand<i64> {
|
|||
|
||||
// iXX_imm0_65535 predicates - True if the immediate is in the range [0,65535].
|
||||
let ParserMatchClass = AsmImmRange<0, 65535>, PrintMethod = "printImmHex" in {
|
||||
def i32_imm0_65535 : Operand<i32>, TImmLeaf<i32, [{
|
||||
def i32_imm0_65535 : Operand<i32>, ImmLeaf<i32, [{
|
||||
return ((uint32_t)Imm) < 65536;
|
||||
}]>;
|
||||
|
||||
def i64_imm0_65535 : Operand<i64>, TImmLeaf<i64, [{
|
||||
def i64_imm0_65535 : Operand<i64>, ImmLeaf<i64, [{
|
||||
return ((uint64_t)Imm) < 65536;
|
||||
}]>;
|
||||
}
|
||||
|
|
|
@ -798,7 +798,7 @@ def MOVbaseTLS : Pseudo<(outs GPR64:$dst), (ins),
|
|||
let Uses = [ X9 ], Defs = [ X16, X17, LR, NZCV ] in {
|
||||
def HWASAN_CHECK_MEMACCESS : Pseudo<
|
||||
(outs), (ins GPR64noip:$ptr, i32imm:$accessinfo),
|
||||
[(int_hwasan_check_memaccess X9, GPR64noip:$ptr, (i32 timm:$accessinfo))]>,
|
||||
[(int_hwasan_check_memaccess X9, GPR64noip:$ptr, (i32 imm:$accessinfo))]>,
|
||||
Sched<[]>;
|
||||
}
|
||||
|
||||
|
|
|
@ -22,7 +22,6 @@
|
|||
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
|
||||
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
|
||||
#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
|
||||
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
|
||||
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
|
||||
#include "llvm/CodeGen/GlobalISel/Utils.h"
|
||||
#include "llvm/CodeGen/MachineBasicBlock.h"
|
||||
|
@ -246,6 +245,10 @@ AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO,
|
|||
}
|
||||
}
|
||||
|
||||
static int64_t getConstant(const MachineInstr *MI) {
|
||||
return MI->getOperand(1).getCImm()->getSExtValue();
|
||||
}
|
||||
|
||||
static unsigned getLogicalBitOpcode(unsigned Opc, bool Is64) {
|
||||
switch (Opc) {
|
||||
case AMDGPU::G_AND:
|
||||
|
@ -734,260 +737,6 @@ buildEXP(const TargetInstrInfo &TII, MachineInstr *Insert, unsigned Tgt,
|
|||
.addImm(Enabled);
|
||||
}
|
||||
|
||||
static bool isZero(Register Reg, MachineRegisterInfo &MRI) {
|
||||
int64_t C;
|
||||
if (mi_match(Reg, MRI, m_ICst(C)) && C == 0)
|
||||
return true;
|
||||
|
||||
// FIXME: matcher should ignore copies
|
||||
return mi_match(Reg, MRI, m_Copy(m_ICst(C))) && C == 0;
|
||||
}
|
||||
|
||||
static unsigned extractGLC(unsigned CachePolicy) {
|
||||
return CachePolicy & 1;
|
||||
}
|
||||
|
||||
static unsigned extractSLC(unsigned CachePolicy) {
|
||||
return (CachePolicy >> 1) & 1;
|
||||
}
|
||||
|
||||
static unsigned extractDLC(unsigned CachePolicy) {
|
||||
return (CachePolicy >> 2) & 1;
|
||||
}
|
||||
|
||||
// Returns Base register, constant offset, and offset def point.
|
||||
static std::tuple<Register, unsigned, MachineInstr *>
|
||||
getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg) {
|
||||
MachineInstr *Def = getDefIgnoringCopies(Reg, MRI);
|
||||
if (!Def)
|
||||
return {Reg, 0, nullptr};
|
||||
|
||||
if (Def->getOpcode() == AMDGPU::G_CONSTANT) {
|
||||
unsigned Offset;
|
||||
const MachineOperand &Op = Def->getOperand(1);
|
||||
if (Op.isImm())
|
||||
Offset = Op.getImm();
|
||||
else
|
||||
Offset = Op.getCImm()->getZExtValue();
|
||||
|
||||
return {Register(), Offset, Def};
|
||||
}
|
||||
|
||||
int64_t Offset;
|
||||
if (Def->getOpcode() == AMDGPU::G_ADD) {
|
||||
// TODO: Handle G_OR used for add case
|
||||
if (mi_match(Def->getOperand(1).getReg(), MRI, m_ICst(Offset)))
|
||||
return {Def->getOperand(0).getReg(), Offset, Def};
|
||||
|
||||
// FIXME: matcher should ignore copies
|
||||
if (mi_match(Def->getOperand(1).getReg(), MRI, m_Copy(m_ICst(Offset))))
|
||||
return {Def->getOperand(0).getReg(), Offset, Def};
|
||||
}
|
||||
|
||||
return {Reg, 0, Def};
|
||||
}
|
||||
|
||||
static unsigned getBufferStoreOpcode(LLT Ty,
|
||||
const unsigned MemSize,
|
||||
const bool Offen) {
|
||||
const int Size = Ty.getSizeInBits();
|
||||
switch (8 * MemSize) {
|
||||
case 8:
|
||||
return Offen ? AMDGPU::BUFFER_STORE_BYTE_OFFEN_exact :
|
||||
AMDGPU::BUFFER_STORE_BYTE_OFFSET_exact;
|
||||
case 16:
|
||||
return Offen ? AMDGPU::BUFFER_STORE_SHORT_OFFEN_exact :
|
||||
AMDGPU::BUFFER_STORE_SHORT_OFFSET_exact;
|
||||
default:
|
||||
unsigned Opc = Offen ? AMDGPU::BUFFER_STORE_DWORD_OFFEN_exact :
|
||||
AMDGPU::BUFFER_STORE_DWORD_OFFSET_exact;
|
||||
if (Size > 32)
|
||||
Opc = AMDGPU::getMUBUFOpcode(Opc, Size / 32);
|
||||
return Opc;
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned getBufferStoreFormatOpcode(LLT Ty,
|
||||
const unsigned MemSize,
|
||||
const bool Offen) {
|
||||
bool IsD16Packed = Ty.getScalarSizeInBits() == 16;
|
||||
bool IsD16Unpacked = 8 * MemSize < Ty.getSizeInBits();
|
||||
int NumElts = Ty.isVector() ? Ty.getNumElements() : 1;
|
||||
|
||||
if (IsD16Packed) {
|
||||
switch (NumElts) {
|
||||
case 1:
|
||||
return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_X_OFFEN_exact :
|
||||
AMDGPU::BUFFER_STORE_FORMAT_D16_X_OFFSET_exact;
|
||||
case 2:
|
||||
return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact :
|
||||
AMDGPU::BUFFER_STORE_FORMAT_D16_XY_OFFSET_exact;
|
||||
case 3:
|
||||
return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XYZ_OFFEN_exact :
|
||||
AMDGPU::BUFFER_STORE_FORMAT_D16_XYZ_OFFSET_exact;
|
||||
case 4:
|
||||
return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact :
|
||||
AMDGPU::BUFFER_STORE_FORMAT_D16_XYZW_OFFSET_exact;
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
if (IsD16Unpacked) {
|
||||
switch (NumElts) {
|
||||
case 1:
|
||||
return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_X_OFFEN_exact :
|
||||
AMDGPU::BUFFER_STORE_FORMAT_D16_X_OFFSET_exact;
|
||||
case 2:
|
||||
return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact :
|
||||
AMDGPU::BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFSET_exact;
|
||||
case 3:
|
||||
return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XYZ_gfx80_OFFEN_exact :
|
||||
AMDGPU::BUFFER_STORE_FORMAT_D16_XYZ_gfx80_OFFSET_exact;
|
||||
case 4:
|
||||
return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact :
|
||||
AMDGPU::BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFSET_exact;
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
switch (NumElts) {
|
||||
case 1:
|
||||
return Offen ? AMDGPU::BUFFER_STORE_FORMAT_X_OFFEN_exact :
|
||||
AMDGPU::BUFFER_STORE_FORMAT_X_OFFSET_exact;
|
||||
case 2:
|
||||
return Offen ? AMDGPU::BUFFER_STORE_FORMAT_XY_OFFEN_exact :
|
||||
AMDGPU::BUFFER_STORE_FORMAT_XY_OFFSET_exact;
|
||||
case 3:
|
||||
return Offen ? AMDGPU::BUFFER_STORE_FORMAT_XYZ_OFFEN_exact :
|
||||
AMDGPU::BUFFER_STORE_FORMAT_XYZ_OFFSET_exact;
|
||||
case 4:
|
||||
return Offen ? AMDGPU::BUFFER_STORE_FORMAT_XYZW_OFFEN_exact :
|
||||
AMDGPU::BUFFER_STORE_FORMAT_XYZW_OFFSET_exact;
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
|
||||
llvm_unreachable("unhandled buffer store");
|
||||
}
|
||||
|
||||
// TODO: Move this to combiner
|
||||
// Returns base register, imm offset, total constant offset.
|
||||
std::tuple<Register, unsigned, unsigned>
|
||||
AMDGPUInstructionSelector::splitBufferOffsets(MachineIRBuilder &B,
|
||||
Register OrigOffset) const {
|
||||
const unsigned MaxImm = 4095;
|
||||
Register BaseReg;
|
||||
unsigned TotalConstOffset;
|
||||
MachineInstr *OffsetDef;
|
||||
MachineRegisterInfo &MRI = *B.getMRI();
|
||||
|
||||
std::tie(BaseReg, TotalConstOffset, OffsetDef)
|
||||
= getBaseWithConstantOffset(MRI, OrigOffset);
|
||||
|
||||
unsigned ImmOffset = TotalConstOffset;
|
||||
|
||||
// If the immediate value is too big for the immoffset field, put the value
|
||||
// and -4096 into the immoffset field so that the value that is copied/added
|
||||
// for the voffset field is a multiple of 4096, and it stands more chance
|
||||
// of being CSEd with the copy/add for another similar load/store.f
|
||||
// However, do not do that rounding down to a multiple of 4096 if that is a
|
||||
// negative number, as it appears to be illegal to have a negative offset
|
||||
// in the vgpr, even if adding the immediate offset makes it positive.
|
||||
unsigned Overflow = ImmOffset & ~MaxImm;
|
||||
ImmOffset -= Overflow;
|
||||
if ((int32_t)Overflow < 0) {
|
||||
Overflow += ImmOffset;
|
||||
ImmOffset = 0;
|
||||
}
|
||||
|
||||
if (Overflow != 0) {
|
||||
// In case this is in a waterfall loop, insert offset code at the def point
|
||||
// of the offset, not inside the loop.
|
||||
MachineBasicBlock::iterator OldInsPt = B.getInsertPt();
|
||||
MachineBasicBlock &OldMBB = B.getMBB();
|
||||
B.setInstr(*OffsetDef);
|
||||
|
||||
if (!BaseReg) {
|
||||
BaseReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
|
||||
B.buildInstr(AMDGPU::V_MOV_B32_e32)
|
||||
.addDef(BaseReg)
|
||||
.addImm(Overflow);
|
||||
} else {
|
||||
Register OverflowVal = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
|
||||
B.buildInstr(AMDGPU::V_MOV_B32_e32)
|
||||
.addDef(OverflowVal)
|
||||
.addImm(Overflow);
|
||||
|
||||
Register NewBaseReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
|
||||
TII.getAddNoCarry(B.getMBB(), B.getInsertPt(), B.getDebugLoc(), NewBaseReg)
|
||||
.addReg(BaseReg)
|
||||
.addReg(OverflowVal, RegState::Kill)
|
||||
.addImm(0);
|
||||
BaseReg = NewBaseReg;
|
||||
}
|
||||
|
||||
B.setInsertPt(OldMBB, OldInsPt);
|
||||
}
|
||||
|
||||
return {BaseReg, ImmOffset, TotalConstOffset};
|
||||
}
|
||||
|
||||
bool AMDGPUInstructionSelector::selectStoreIntrinsic(MachineInstr &MI,
|
||||
bool IsFormat) const {
|
||||
MachineIRBuilder B(MI);
|
||||
MachineRegisterInfo &MRI = *B.getMRI();
|
||||
MachineFunction &MF = B.getMF();
|
||||
Register VData = MI.getOperand(1).getReg();
|
||||
LLT Ty = MRI.getType(VData);
|
||||
|
||||
int Size = Ty.getSizeInBits();
|
||||
if (Size % 32 != 0)
|
||||
return false;
|
||||
|
||||
// FIXME: Verifier should enforce 1 MMO for these intrinsics.
|
||||
MachineMemOperand *MMO = *MI.memoperands_begin();
|
||||
const int MemSize = MMO->getSize();
|
||||
|
||||
Register RSrc = MI.getOperand(2).getReg();
|
||||
Register VOffset = MI.getOperand(3).getReg();
|
||||
Register SOffset = MI.getOperand(4).getReg();
|
||||
unsigned CachePolicy = MI.getOperand(5).getImm();
|
||||
unsigned ImmOffset;
|
||||
unsigned TotalOffset;
|
||||
|
||||
std::tie(VOffset, ImmOffset, TotalOffset) = splitBufferOffsets(B, VOffset);
|
||||
if (TotalOffset != 0)
|
||||
MMO = MF.getMachineMemOperand(MMO, TotalOffset, MemSize);
|
||||
|
||||
const bool Offen = !isZero(VOffset, MRI);
|
||||
|
||||
int Opc = IsFormat ? getBufferStoreFormatOpcode(Ty, MemSize, Offen) :
|
||||
getBufferStoreOpcode(Ty, MemSize, Offen);
|
||||
if (Opc == -1)
|
||||
return false;
|
||||
|
||||
MachineInstrBuilder MIB = B.buildInstr(Opc)
|
||||
.addUse(VData);
|
||||
|
||||
if (Offen)
|
||||
MIB.addUse(VOffset);
|
||||
|
||||
MIB.addUse(RSrc)
|
||||
.addUse(SOffset)
|
||||
.addImm(ImmOffset)
|
||||
.addImm(extractGLC(CachePolicy))
|
||||
.addImm(extractSLC(CachePolicy))
|
||||
.addImm(0) // tfe: FIXME: Remove from inst
|
||||
.addImm(extractDLC(CachePolicy))
|
||||
.addMemOperand(MMO);
|
||||
|
||||
MI.eraseFromParent();
|
||||
|
||||
return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
|
||||
}
|
||||
|
||||
bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
|
||||
MachineInstr &I) const {
|
||||
MachineBasicBlock *BB = I.getParent();
|
||||
|
@ -997,10 +746,10 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
|
|||
unsigned IntrinsicID = I.getIntrinsicID();
|
||||
switch (IntrinsicID) {
|
||||
case Intrinsic::amdgcn_exp: {
|
||||
int64_t Tgt = I.getOperand(1).getImm();
|
||||
int64_t Enabled = I.getOperand(2).getImm();
|
||||
int64_t Done = I.getOperand(7).getImm();
|
||||
int64_t VM = I.getOperand(8).getImm();
|
||||
int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg()));
|
||||
int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg()));
|
||||
int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(7).getReg()));
|
||||
int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(8).getReg()));
|
||||
|
||||
MachineInstr *Exp = buildEXP(TII, &I, Tgt, I.getOperand(3).getReg(),
|
||||
I.getOperand(4).getReg(),
|
||||
|
@ -1013,13 +762,13 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
|
|||
}
|
||||
case Intrinsic::amdgcn_exp_compr: {
|
||||
const DebugLoc &DL = I.getDebugLoc();
|
||||
int64_t Tgt = I.getOperand(1).getImm();
|
||||
int64_t Enabled = I.getOperand(2).getImm();
|
||||
int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg()));
|
||||
int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg()));
|
||||
Register Reg0 = I.getOperand(3).getReg();
|
||||
Register Reg1 = I.getOperand(4).getReg();
|
||||
Register Undef = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
|
||||
int64_t Done = I.getOperand(5).getImm();
|
||||
int64_t VM = I.getOperand(6).getImm();
|
||||
int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(5).getReg()));
|
||||
int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(6).getReg()));
|
||||
|
||||
BuildMI(*BB, &I, DL, TII.get(AMDGPU::IMPLICIT_DEF), Undef);
|
||||
MachineInstr *Exp = buildEXP(TII, &I, Tgt, Reg0, Reg1, Undef, Undef, VM,
|
||||
|
@ -1042,10 +791,6 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
|
|||
MRI.setRegClass(Reg, TRI.getWaveMaskRegClass());
|
||||
return true;
|
||||
}
|
||||
case Intrinsic::amdgcn_raw_buffer_store:
|
||||
return selectStoreIntrinsic(I, false);
|
||||
case Intrinsic::amdgcn_raw_buffer_store_format:
|
||||
return selectStoreIntrinsic(I, true);
|
||||
default:
|
||||
return selectImpl(I, *CoverageInfo);
|
||||
}
|
||||
|
|
|
@ -35,7 +35,6 @@ class AMDGPUInstrInfo;
|
|||
class AMDGPURegisterBankInfo;
|
||||
class GCNSubtarget;
|
||||
class MachineInstr;
|
||||
class MachineIRBuilder;
|
||||
class MachineOperand;
|
||||
class MachineRegisterInfo;
|
||||
class SIInstrInfo;
|
||||
|
@ -83,12 +82,6 @@ private:
|
|||
bool selectG_IMPLICIT_DEF(MachineInstr &I) const;
|
||||
bool selectG_INSERT(MachineInstr &I) const;
|
||||
bool selectG_INTRINSIC(MachineInstr &I) const;
|
||||
|
||||
std::tuple<Register, unsigned, unsigned>
|
||||
splitBufferOffsets(MachineIRBuilder &B, Register OrigOffset) const;
|
||||
|
||||
bool selectStoreIntrinsic(MachineInstr &MI, bool IsFormat) const;
|
||||
|
||||
bool selectG_INTRINSIC_W_SIDE_EFFECTS(MachineInstr &I) const;
|
||||
int getS_CMPOpcode(CmpInst::Predicate P, unsigned Size) const;
|
||||
bool selectG_ICMP(MachineInstr &I) const;
|
||||
|
|
|
@ -1751,62 +1751,6 @@ bool AMDGPULegalizerInfo::legalizeIsAddrSpace(MachineInstr &MI,
|
|||
return true;
|
||||
}
|
||||
|
||||
/// Handle register layout difference for f16 images for some subtargets.
|
||||
Register AMDGPULegalizerInfo::handleD16VData(MachineIRBuilder &B,
|
||||
MachineRegisterInfo &MRI,
|
||||
Register Reg) const {
|
||||
if (!ST.hasUnpackedD16VMem())
|
||||
return Reg;
|
||||
|
||||
const LLT S16 = LLT::scalar(16);
|
||||
const LLT S32 = LLT::scalar(32);
|
||||
LLT StoreVT = MRI.getType(Reg);
|
||||
assert(StoreVT.isVector() && StoreVT.getElementType() == S16);
|
||||
|
||||
auto Unmerge = B.buildUnmerge(S16, Reg);
|
||||
|
||||
SmallVector<Register, 4> WideRegs;
|
||||
for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
|
||||
WideRegs.push_back(B.buildAnyExt(S32, Unmerge.getReg(I)).getReg(0));
|
||||
|
||||
int NumElts = StoreVT.getNumElements();
|
||||
|
||||
return B.buildBuildVector(LLT::vector(NumElts, S32), WideRegs).getReg(0);
|
||||
}
|
||||
|
||||
bool AMDGPULegalizerInfo::legalizeRawBufferStore(MachineInstr &MI,
|
||||
MachineRegisterInfo &MRI,
|
||||
MachineIRBuilder &B,
|
||||
bool IsFormat) const {
|
||||
// TODO: Reject f16 format on targets where unsupported.
|
||||
Register VData = MI.getOperand(1).getReg();
|
||||
LLT Ty = MRI.getType(VData);
|
||||
|
||||
B.setInstr(MI);
|
||||
|
||||
const LLT S32 = LLT::scalar(32);
|
||||
const LLT S16 = LLT::scalar(16);
|
||||
|
||||
// Fixup illegal register types for i8 stores.
|
||||
if (Ty == LLT::scalar(8) || Ty == S16) {
|
||||
Register AnyExt = B.buildAnyExt(LLT::scalar(32), VData).getReg(0);
|
||||
MI.getOperand(1).setReg(AnyExt);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (Ty.isVector()) {
|
||||
if (Ty.getElementType() == S16 && Ty.getNumElements() <= 4) {
|
||||
if (IsFormat)
|
||||
MI.getOperand(1).setReg(handleD16VData(B, MRI, VData));
|
||||
return true;
|
||||
}
|
||||
|
||||
return Ty.getElementType() == S32 && Ty.getNumElements() <= 4;
|
||||
}
|
||||
|
||||
return Ty == S32;
|
||||
}
|
||||
|
||||
bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
|
||||
MachineRegisterInfo &MRI,
|
||||
MachineIRBuilder &B) const {
|
||||
|
@ -1899,10 +1843,6 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
|
|||
MI.eraseFromParent();
|
||||
return true;
|
||||
}
|
||||
case Intrinsic::amdgcn_raw_buffer_store:
|
||||
return legalizeRawBufferStore(MI, MRI, B, false);
|
||||
case Intrinsic::amdgcn_raw_buffer_store_format:
|
||||
return legalizeRawBufferStore(MI, MRI, B, true);
|
||||
default:
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -83,11 +83,6 @@ public:
|
|||
MachineIRBuilder &B) const;
|
||||
bool legalizeIsAddrSpace(MachineInstr &MI, MachineRegisterInfo &MRI,
|
||||
MachineIRBuilder &B, unsigned AddrSpace) const;
|
||||
|
||||
Register handleD16VData(MachineIRBuilder &B, MachineRegisterInfo &MRI,
|
||||
Register Reg) const;
|
||||
bool legalizeRawBufferStore(MachineInstr &MI, MachineRegisterInfo &MRI,
|
||||
MachineIRBuilder &B, bool IsFormat) const;
|
||||
bool legalizeIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI,
|
||||
MachineIRBuilder &B) const override;
|
||||
|
||||
|
|
|
@ -20,7 +20,6 @@
|
|||
#include "llvm/ADT/SmallSet.h"
|
||||
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
|
||||
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
|
||||
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
|
||||
#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
|
||||
#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
|
||||
#include "llvm/CodeGen/TargetRegisterInfo.h"
|
||||
|
@ -34,7 +33,6 @@
|
|||
#include "AMDGPUGenRegisterBankInfo.def"
|
||||
|
||||
using namespace llvm;
|
||||
using namespace MIPatternMatch;
|
||||
|
||||
namespace {
|
||||
|
||||
|
@ -86,11 +84,9 @@ public:
|
|||
};
|
||||
|
||||
}
|
||||
AMDGPURegisterBankInfo::AMDGPURegisterBankInfo(const GCNSubtarget &ST)
|
||||
AMDGPURegisterBankInfo::AMDGPURegisterBankInfo(const TargetRegisterInfo &TRI)
|
||||
: AMDGPUGenRegisterBankInfo(),
|
||||
Subtarget(ST),
|
||||
TRI(Subtarget.getRegisterInfo()),
|
||||
TII(Subtarget.getInstrInfo()) {
|
||||
TRI(static_cast<const SIRegisterInfo*>(&TRI)) {
|
||||
|
||||
// HACK: Until this is fully tablegen'd.
|
||||
static bool AlreadyInit = false;
|
||||
|
@ -642,10 +638,8 @@ static LLT getHalfSizedType(LLT Ty) {
|
|||
///
|
||||
/// There is additional complexity to try for compare values to identify the
|
||||
/// unique values used.
|
||||
bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
|
||||
MachineIRBuilder &B,
|
||||
MachineInstr &MI,
|
||||
MachineRegisterInfo &MRI,
|
||||
void AMDGPURegisterBankInfo::executeInWaterfallLoop(
|
||||
MachineInstr &MI, MachineRegisterInfo &MRI,
|
||||
ArrayRef<unsigned> OpIndices) const {
|
||||
MachineFunction *MF = MI.getParent()->getParent();
|
||||
const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
|
||||
|
@ -668,8 +662,9 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
|
|||
|
||||
// No operands need to be replaced, so no need to loop.
|
||||
if (SGPROperandRegs.empty())
|
||||
return false;
|
||||
return;
|
||||
|
||||
MachineIRBuilder B(MI);
|
||||
SmallVector<Register, 4> ResultRegs;
|
||||
SmallVector<Register, 4> InitResultRegs;
|
||||
SmallVector<Register, 4> PhiRegs;
|
||||
|
@ -927,18 +922,6 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
|
|||
B.buildInstr(AMDGPU::S_MOV_B64_term)
|
||||
.addDef(AMDGPU::EXEC)
|
||||
.addReg(SaveExecReg);
|
||||
|
||||
// Restore the insert point before the original instruction.
|
||||
B.setInsertPt(MBB, MBB.end());
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
|
||||
MachineInstr &MI, MachineRegisterInfo &MRI,
|
||||
ArrayRef<unsigned> OpIndices) const {
|
||||
MachineIRBuilder B(MI);
|
||||
return executeInWaterfallLoop(B, MI, MRI, OpIndices);
|
||||
}
|
||||
|
||||
// Legalize an operand that must be an SGPR by inserting a readfirstlane.
|
||||
|
@ -1048,33 +1031,6 @@ bool AMDGPURegisterBankInfo::applyMappingWideLoad(MachineInstr &MI,
|
|||
return true;
|
||||
}
|
||||
|
||||
bool AMDGPURegisterBankInfo::applyMappingImage(
|
||||
MachineInstr &MI, const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper,
|
||||
MachineRegisterInfo &MRI, int RsrcIdx) const {
|
||||
const int NumDefs = MI.getNumExplicitDefs();
|
||||
|
||||
// The reported argument index is relative to the IR intrinsic call arguments,
|
||||
// so we need to shift by the number of defs and the intrinsic ID.
|
||||
RsrcIdx += NumDefs + 1;
|
||||
|
||||
// Insert copies to VGPR arguments.
|
||||
applyDefaultMapping(OpdMapper);
|
||||
|
||||
// Fixup any SGPR arguments.
|
||||
SmallVector<unsigned, 4> SGPRIndexes;
|
||||
for (int I = NumDefs, NumOps = MI.getNumOperands(); I != NumOps; ++I) {
|
||||
if (!MI.getOperand(I).isReg())
|
||||
continue;
|
||||
|
||||
// If this intrinsic has a sampler, it immediately follows rsrc.
|
||||
if (I == RsrcIdx || I == RsrcIdx + 1)
|
||||
SGPRIndexes.push_back(I);
|
||||
}
|
||||
|
||||
executeInWaterfallLoop(MI, MRI, SGPRIndexes);
|
||||
return true;
|
||||
}
|
||||
|
||||
// For cases where only a single copy is inserted for matching register banks.
|
||||
// Replace the register in the instruction operand
|
||||
static void substituteSimpleCopyRegs(
|
||||
|
@ -1086,184 +1042,6 @@ static void substituteSimpleCopyRegs(
|
|||
}
|
||||
}
|
||||
|
||||
/// Handle register layout difference for f16 images for some subtargets.
|
||||
Register AMDGPURegisterBankInfo::handleD16VData(MachineIRBuilder &B,
|
||||
MachineRegisterInfo &MRI,
|
||||
Register Reg) const {
|
||||
if (!Subtarget.hasUnpackedD16VMem())
|
||||
return Reg;
|
||||
|
||||
const LLT S16 = LLT::scalar(16);
|
||||
LLT StoreVT = MRI.getType(Reg);
|
||||
if (!StoreVT.isVector() || StoreVT.getElementType() != S16)
|
||||
return Reg;
|
||||
|
||||
auto Unmerge = B.buildUnmerge(S16, Reg);
|
||||
|
||||
|
||||
SmallVector<Register, 4> WideRegs;
|
||||
for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
|
||||
WideRegs.push_back(Unmerge.getReg(I));
|
||||
|
||||
const LLT S32 = LLT::scalar(32);
|
||||
int NumElts = StoreVT.getNumElements();
|
||||
|
||||
return B.buildMerge(LLT::vector(NumElts, S32), WideRegs).getReg(0);
|
||||
}
|
||||
|
||||
static std::pair<Register, unsigned>
|
||||
getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg) {
|
||||
int64_t Const;
|
||||
if (mi_match(Reg, MRI, m_ICst(Const)))
|
||||
return std::make_pair(Register(), Const);
|
||||
|
||||
Register Base;
|
||||
if (mi_match(Reg, MRI, m_GAdd(m_Reg(Base), m_ICst(Const))))
|
||||
return std::make_pair(Base, Const);
|
||||
|
||||
// TODO: Handle G_OR used for add case
|
||||
return std::make_pair(Reg, 0);
|
||||
}
|
||||
|
||||
std::pair<Register, unsigned>
|
||||
AMDGPURegisterBankInfo::splitBufferOffsets(MachineIRBuilder &B,
|
||||
Register OrigOffset) const {
|
||||
const unsigned MaxImm = 4095;
|
||||
Register BaseReg;
|
||||
unsigned ImmOffset;
|
||||
const LLT S32 = LLT::scalar(32);
|
||||
|
||||
std::tie(BaseReg, ImmOffset) = getBaseWithConstantOffset(*B.getMRI(),
|
||||
OrigOffset);
|
||||
|
||||
unsigned C1 = 0;
|
||||
if (ImmOffset != 0) {
|
||||
// If the immediate value is too big for the immoffset field, put the value
|
||||
// and -4096 into the immoffset field so that the value that is copied/added
|
||||
// for the voffset field is a multiple of 4096, and it stands more chance
|
||||
// of being CSEd with the copy/add for another similar load/store.
|
||||
// However, do not do that rounding down to a multiple of 4096 if that is a
|
||||
// negative number, as it appears to be illegal to have a negative offset
|
||||
// in the vgpr, even if adding the immediate offset makes it positive.
|
||||
unsigned Overflow = ImmOffset & ~MaxImm;
|
||||
ImmOffset -= Overflow;
|
||||
if ((int32_t)Overflow < 0) {
|
||||
Overflow += ImmOffset;
|
||||
ImmOffset = 0;
|
||||
}
|
||||
|
||||
C1 = ImmOffset;
|
||||
if (Overflow != 0) {
|
||||
if (!BaseReg)
|
||||
BaseReg = B.buildConstant(S32, Overflow).getReg(0);
|
||||
else {
|
||||
auto OverflowVal = B.buildConstant(S32, Overflow);
|
||||
BaseReg = B.buildAdd(S32, BaseReg, OverflowVal).getReg(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!BaseReg)
|
||||
BaseReg = B.buildConstant(S32, 0).getReg(0);
|
||||
|
||||
return {BaseReg, C1};
|
||||
}
|
||||
|
||||
static bool isZero(Register Reg, MachineRegisterInfo &MRI) {
|
||||
int64_t C;
|
||||
return mi_match(Reg, MRI, m_ICst(C)) && C == 0;
|
||||
}
|
||||
|
||||
static unsigned extractGLC(unsigned CachePolicy) {
|
||||
return CachePolicy & 1;
|
||||
}
|
||||
|
||||
static unsigned extractSLC(unsigned CachePolicy) {
|
||||
return (CachePolicy >> 1) & 1;
|
||||
}
|
||||
|
||||
static unsigned extractDLC(unsigned CachePolicy) {
|
||||
return (CachePolicy >> 2) & 1;
|
||||
}
|
||||
|
||||
MachineInstr *
|
||||
AMDGPURegisterBankInfo::selectStoreIntrinsic(MachineIRBuilder &B,
|
||||
MachineInstr &MI) const {
|
||||
MachineRegisterInfo &MRI = *B.getMRI();
|
||||
executeInWaterfallLoop(B, MI, MRI, {2, 4});
|
||||
|
||||
// FIXME: DAG lowering brokenly changes opcode based on FP vs. integer.
|
||||
|
||||
Register VData = MI.getOperand(1).getReg();
|
||||
LLT Ty = MRI.getType(VData);
|
||||
|
||||
int EltSize = Ty.getScalarSizeInBits();
|
||||
int Size = Ty.getSizeInBits();
|
||||
|
||||
// FIXME: Broken integer truncstore.
|
||||
if (EltSize != 32)
|
||||
report_fatal_error("unhandled intrinsic store");
|
||||
|
||||
// FIXME: Verifier should enforce 1 MMO for these intrinsics.
|
||||
const int MemSize = (*MI.memoperands_begin())->getSize();
|
||||
|
||||
|
||||
Register RSrc = MI.getOperand(2).getReg();
|
||||
Register VOffset = MI.getOperand(3).getReg();
|
||||
Register SOffset = MI.getOperand(4).getReg();
|
||||
unsigned CachePolicy = MI.getOperand(5).getImm();
|
||||
|
||||
unsigned ImmOffset;
|
||||
std::tie(VOffset, ImmOffset) = splitBufferOffsets(B, VOffset);
|
||||
|
||||
const bool Offen = !isZero(VOffset, MRI);
|
||||
|
||||
unsigned Opc = AMDGPU::BUFFER_STORE_DWORD_OFFEN_exact;
|
||||
switch (8 * MemSize) {
|
||||
case 8:
|
||||
Opc = Offen ? AMDGPU::BUFFER_STORE_BYTE_OFFEN_exact :
|
||||
AMDGPU::BUFFER_STORE_BYTE_OFFSET_exact;
|
||||
break;
|
||||
case 16:
|
||||
Opc = Offen ? AMDGPU::BUFFER_STORE_SHORT_OFFEN_exact :
|
||||
AMDGPU::BUFFER_STORE_SHORT_OFFSET_exact;
|
||||
break;
|
||||
default:
|
||||
Opc = Offen ? AMDGPU::BUFFER_STORE_DWORD_OFFEN_exact :
|
||||
AMDGPU::BUFFER_STORE_DWORD_OFFSET_exact;
|
||||
if (Size > 32)
|
||||
Opc = AMDGPU::getMUBUFOpcode(Opc, Size / 32);
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
// Set the insertion point back to the instruction in case it was moved into a
|
||||
// loop.
|
||||
B.setInstr(MI);
|
||||
|
||||
MachineInstrBuilder MIB = B.buildInstr(Opc)
|
||||
.addUse(VData);
|
||||
|
||||
if (Offen)
|
||||
MIB.addUse(VOffset);
|
||||
|
||||
MIB.addUse(RSrc)
|
||||
.addUse(SOffset)
|
||||
.addImm(ImmOffset)
|
||||
.addImm(extractGLC(CachePolicy))
|
||||
.addImm(extractSLC(CachePolicy))
|
||||
.addImm(0) // tfe: FIXME: Remove from inst
|
||||
.addImm(extractDLC(CachePolicy))
|
||||
.cloneMemRefs(MI);
|
||||
|
||||
// FIXME: We need a way to report failure from applyMappingImpl.
|
||||
// Insert constrain copies before inserting the loop.
|
||||
if (!constrainSelectedInstRegOperands(*MIB, *TII, *TRI, *this))
|
||||
report_fatal_error("failed to constrain selected store intrinsic");
|
||||
|
||||
return MIB;
|
||||
}
|
||||
|
||||
void AMDGPURegisterBankInfo::applyMappingImpl(
|
||||
const OperandsMapper &OpdMapper) const {
|
||||
MachineInstr &MI = OpdMapper.getMI();
|
||||
|
@ -1627,8 +1405,7 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
|
|||
break;
|
||||
}
|
||||
case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: {
|
||||
auto IntrID = MI.getIntrinsicID();
|
||||
switch (IntrID) {
|
||||
switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) {
|
||||
case Intrinsic::amdgcn_buffer_load: {
|
||||
executeInWaterfallLoop(MI, MRI, { 2 });
|
||||
return;
|
||||
|
@ -1647,39 +1424,9 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
|
|||
constrainOpWithReadfirstlane(MI, MRI, 2); // M0
|
||||
return;
|
||||
}
|
||||
case Intrinsic::amdgcn_raw_buffer_load:
|
||||
case Intrinsic::amdgcn_raw_buffer_load_format:
|
||||
case Intrinsic::amdgcn_raw_tbuffer_load:
|
||||
case Intrinsic::amdgcn_raw_buffer_store:
|
||||
case Intrinsic::amdgcn_raw_buffer_store_format:
|
||||
case Intrinsic::amdgcn_raw_tbuffer_store: {
|
||||
applyDefaultMapping(OpdMapper);
|
||||
executeInWaterfallLoop(MI, MRI, {2, 4});
|
||||
return;
|
||||
}
|
||||
case Intrinsic::amdgcn_struct_buffer_load:
|
||||
case Intrinsic::amdgcn_struct_buffer_store:
|
||||
case Intrinsic::amdgcn_struct_tbuffer_load:
|
||||
case Intrinsic::amdgcn_struct_tbuffer_store: {
|
||||
applyDefaultMapping(OpdMapper);
|
||||
executeInWaterfallLoop(MI, MRI, {2, 5});
|
||||
return;
|
||||
}
|
||||
default: {
|
||||
if (const AMDGPU::RsrcIntrinsic *RSrcIntrin =
|
||||
AMDGPU::lookupRsrcIntrinsic(IntrID)) {
|
||||
// Non-images can have complications from operands that allow both SGPR
|
||||
// and VGPR. For now it's too complicated to figure out the final opcode
|
||||
// to derive the register bank from the MCInstrDesc.
|
||||
if (RSrcIntrin->IsImage) {
|
||||
applyMappingImage(MI, OpdMapper, MRI, RSrcIntrin->RsrcArg);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case AMDGPU::G_LOAD:
|
||||
|
@ -1784,45 +1531,6 @@ AMDGPURegisterBankInfo::getDefaultMappingAllVGPR(const MachineInstr &MI) const {
|
|||
MI.getNumOperands());
|
||||
}
|
||||
|
||||
const RegisterBankInfo::InstructionMapping &
|
||||
AMDGPURegisterBankInfo::getImageMapping(const MachineRegisterInfo &MRI,
|
||||
const MachineInstr &MI,
|
||||
int RsrcIdx) const {
|
||||
// The reported argument index is relative to the IR intrinsic call arguments,
|
||||
// so we need to shift by the number of defs and the intrinsic ID.
|
||||
RsrcIdx += MI.getNumExplicitDefs() + 1;
|
||||
|
||||
const int NumOps = MI.getNumOperands();
|
||||
SmallVector<const ValueMapping *, 8> OpdsMapping(NumOps);
|
||||
|
||||
// TODO: Should packed/unpacked D16 difference be reported here as part of
|
||||
// the value mapping?
|
||||
for (int I = 0; I != NumOps; ++I) {
|
||||
if (!MI.getOperand(I).isReg())
|
||||
continue;
|
||||
|
||||
Register OpReg = MI.getOperand(I).getReg();
|
||||
unsigned Size = getSizeInBits(OpReg, MRI, *TRI);
|
||||
|
||||
// FIXME: Probably need a new intrinsic register bank searchable table to
|
||||
// handle arbitrary intrinsics easily.
|
||||
//
|
||||
// If this has a sampler, it immediately follows rsrc.
|
||||
const bool MustBeSGPR = I == RsrcIdx || I == RsrcIdx + 1;
|
||||
|
||||
if (MustBeSGPR) {
|
||||
// If this must be an SGPR, so we must report whatever it is as legal.
|
||||
unsigned NewBank = getRegBankID(OpReg, MRI, *TRI, AMDGPU::SGPRRegBankID);
|
||||
OpdsMapping[I] = AMDGPU::getValueMapping(NewBank, Size);
|
||||
} else {
|
||||
// Some operands must be VGPR, and these are easy to copy to.
|
||||
OpdsMapping[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
|
||||
}
|
||||
}
|
||||
|
||||
return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping), NumOps);
|
||||
}
|
||||
|
||||
const RegisterBankInfo::InstructionMapping &
|
||||
AMDGPURegisterBankInfo::getInstrMappingForLoad(const MachineInstr &MI) const {
|
||||
|
||||
|
@ -1869,31 +1577,11 @@ AMDGPURegisterBankInfo::getRegBankID(Register Reg,
|
|||
return Bank ? Bank->getID() : Default;
|
||||
}
|
||||
|
||||
|
||||
static unsigned regBankUnion(unsigned RB0, unsigned RB1) {
|
||||
return (RB0 == AMDGPU::SGPRRegBankID && RB1 == AMDGPU::SGPRRegBankID) ?
|
||||
AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
|
||||
}
|
||||
|
||||
const RegisterBankInfo::ValueMapping *
|
||||
AMDGPURegisterBankInfo::getSGPROpMapping(Register Reg,
|
||||
const MachineRegisterInfo &MRI,
|
||||
const TargetRegisterInfo &TRI) const {
|
||||
// Lie and claim anything is legal, even though this needs to be an SGPR
|
||||
// applyMapping will have to deal with it as a waterfall loop.
|
||||
unsigned Bank = getRegBankID(Reg, MRI, TRI, AMDGPU::SGPRRegBankID);
|
||||
unsigned Size = getSizeInBits(Reg, MRI, TRI);
|
||||
return AMDGPU::getValueMapping(Bank, Size);
|
||||
}
|
||||
|
||||
const RegisterBankInfo::ValueMapping *
|
||||
AMDGPURegisterBankInfo::getVGPROpMapping(Register Reg,
|
||||
const MachineRegisterInfo &MRI,
|
||||
const TargetRegisterInfo &TRI) const {
|
||||
unsigned Size = getSizeInBits(Reg, MRI, TRI);
|
||||
return AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
|
||||
}
|
||||
|
||||
///
|
||||
/// This function must return a legal mapping, because
|
||||
/// AMDGPURegisterBankInfo::getInstrAlternativeMappings() is not called
|
||||
|
@ -2060,6 +1748,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
|
|||
|
||||
LLVM_FALLTHROUGH;
|
||||
}
|
||||
|
||||
case AMDGPU::G_GEP:
|
||||
case AMDGPU::G_ADD:
|
||||
case AMDGPU::G_SUB:
|
||||
|
@ -2075,6 +1764,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
|
|||
case AMDGPU::G_SADDE:
|
||||
case AMDGPU::G_USUBE:
|
||||
case AMDGPU::G_SSUBE:
|
||||
case AMDGPU::G_UMULH:
|
||||
case AMDGPU::G_SMULH:
|
||||
case AMDGPU::G_SMIN:
|
||||
case AMDGPU::G_SMAX:
|
||||
case AMDGPU::G_UMIN:
|
||||
|
@ -2108,13 +1799,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
|
|||
case AMDGPU::G_INTRINSIC_TRUNC:
|
||||
case AMDGPU::G_INTRINSIC_ROUND:
|
||||
return getDefaultMappingVOP(MI);
|
||||
case AMDGPU::G_UMULH:
|
||||
case AMDGPU::G_SMULH: {
|
||||
if (MF.getSubtarget<GCNSubtarget>().hasScalarMulHiInsts() &&
|
||||
isSALUMapping(MI))
|
||||
return getDefaultMappingSOP(MI);
|
||||
return getDefaultMappingVOP(MI);
|
||||
}
|
||||
case AMDGPU::G_IMPLICIT_DEF: {
|
||||
unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
|
||||
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
|
||||
|
@ -2388,7 +2072,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
|
|||
case Intrinsic::amdgcn_wwm:
|
||||
case Intrinsic::amdgcn_wqm:
|
||||
return getDefaultMappingVOP(MI);
|
||||
case Intrinsic::amdgcn_ds_swizzle:
|
||||
case Intrinsic::amdgcn_ds_permute:
|
||||
case Intrinsic::amdgcn_ds_bpermute:
|
||||
case Intrinsic::amdgcn_update_dpp:
|
||||
|
@ -2510,8 +2193,9 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
|
|||
break;
|
||||
}
|
||||
case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: {
|
||||
auto IntrID = MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID();
|
||||
switch (IntrID) {
|
||||
switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) {
|
||||
default:
|
||||
return getInvalidInstructionMapping();
|
||||
case Intrinsic::amdgcn_s_getreg:
|
||||
case Intrinsic::amdgcn_s_memtime:
|
||||
case Intrinsic::amdgcn_s_memrealtime:
|
||||
|
@ -2551,11 +2235,18 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
|
|||
OpdsMapping[6] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
|
||||
break;
|
||||
case Intrinsic::amdgcn_exp:
|
||||
OpdsMapping[0] = nullptr; // IntrinsicID
|
||||
// FIXME: These are immediate values which can't be read from registers.
|
||||
OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
|
||||
OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
|
||||
// FIXME: Could we support packed types here?
|
||||
OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
|
||||
OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
|
||||
OpdsMapping[5] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
|
||||
OpdsMapping[6] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
|
||||
// FIXME: These are immediate values which can't be read from registers.
|
||||
OpdsMapping[7] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
|
||||
OpdsMapping[8] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
|
||||
break;
|
||||
case Intrinsic::amdgcn_buffer_load: {
|
||||
Register RSrc = MI.getOperand(2).getReg(); // SGPR
|
||||
|
@ -2607,54 +2298,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
|
|||
OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
|
||||
break;
|
||||
}
|
||||
case Intrinsic::amdgcn_raw_buffer_load:
|
||||
case Intrinsic::amdgcn_raw_tbuffer_load: {
|
||||
// FIXME: Should make intrinsic ID the last operand of the instruction,
|
||||
// then this would be the same as store
|
||||
OpdsMapping[0] = getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI);
|
||||
OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
|
||||
OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI);
|
||||
OpdsMapping[4] = getSGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI);
|
||||
break;
|
||||
}
|
||||
case Intrinsic::amdgcn_raw_buffer_store:
|
||||
case Intrinsic::amdgcn_raw_buffer_store_format:
|
||||
case Intrinsic::amdgcn_raw_tbuffer_store: {
|
||||
OpdsMapping[1] = getVGPROpMapping(MI.getOperand(1).getReg(), MRI, *TRI);
|
||||
OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
|
||||
OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI);
|
||||
OpdsMapping[4] = getSGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI);
|
||||
break;
|
||||
}
|
||||
case Intrinsic::amdgcn_struct_buffer_load:
|
||||
case Intrinsic::amdgcn_struct_tbuffer_load: {
|
||||
OpdsMapping[0] = getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI);
|
||||
OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
|
||||
OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI);
|
||||
OpdsMapping[4] = getVGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI);
|
||||
OpdsMapping[5] = getSGPROpMapping(MI.getOperand(5).getReg(), MRI, *TRI);
|
||||
break;
|
||||
}
|
||||
case Intrinsic::amdgcn_struct_buffer_store:
|
||||
case Intrinsic::amdgcn_struct_tbuffer_store: {
|
||||
OpdsMapping[1] = getVGPROpMapping(MI.getOperand(1).getReg(), MRI, *TRI);
|
||||
OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
|
||||
OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI);
|
||||
OpdsMapping[4] = getVGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI);
|
||||
OpdsMapping[5] = getSGPROpMapping(MI.getOperand(5).getReg(), MRI, *TRI);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
if (const AMDGPU::RsrcIntrinsic *RSrcIntrin =
|
||||
AMDGPU::lookupRsrcIntrinsic(IntrID)) {
|
||||
// Non-images can have complications from operands that allow both SGPR
|
||||
// and VGPR. For now it's too complicated to figure out the final opcode
|
||||
// to derive the register bank from the MCInstrDesc.
|
||||
if (RSrcIntrin->IsImage)
|
||||
return getImageMapping(MRI, MI, RSrcIntrin->RsrcArg);
|
||||
}
|
||||
|
||||
return getInvalidInstructionMapping();
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -23,9 +23,7 @@
|
|||
namespace llvm {
|
||||
|
||||
class LLT;
|
||||
class GCNSubtarget;
|
||||
class MachineIRBuilder;
|
||||
class SIInstrInfo;
|
||||
class SIRegisterInfo;
|
||||
class TargetRegisterInfo;
|
||||
|
||||
|
@ -38,15 +36,9 @@ protected:
|
|||
#include "AMDGPUGenRegisterBank.inc"
|
||||
};
|
||||
class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo {
|
||||
const GCNSubtarget &Subtarget;
|
||||
const SIRegisterInfo *TRI;
|
||||
const SIInstrInfo *TII;
|
||||
|
||||
bool executeInWaterfallLoop(MachineIRBuilder &B,
|
||||
MachineInstr &MI,
|
||||
MachineRegisterInfo &MRI,
|
||||
ArrayRef<unsigned> OpIndices) const;
|
||||
bool executeInWaterfallLoop(MachineInstr &MI,
|
||||
void executeInWaterfallLoop(MachineInstr &MI,
|
||||
MachineRegisterInfo &MRI,
|
||||
ArrayRef<unsigned> OpIndices) const;
|
||||
|
||||
|
@ -55,19 +47,6 @@ class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo {
|
|||
bool applyMappingWideLoad(MachineInstr &MI,
|
||||
const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper,
|
||||
MachineRegisterInfo &MRI) const;
|
||||
bool
|
||||
applyMappingImage(MachineInstr &MI,
|
||||
const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper,
|
||||
MachineRegisterInfo &MRI, int RSrcIdx) const;
|
||||
|
||||
Register handleD16VData(MachineIRBuilder &B, MachineRegisterInfo &MRI,
|
||||
Register Reg) const;
|
||||
|
||||
std::pair<Register, unsigned>
|
||||
splitBufferOffsets(MachineIRBuilder &B, Register Offset) const;
|
||||
|
||||
MachineInstr *selectStoreIntrinsic(MachineIRBuilder &B,
|
||||
MachineInstr &MI) const;
|
||||
|
||||
/// See RegisterBankInfo::applyMapping.
|
||||
void applyMappingImpl(const OperandsMapper &OpdMapper) const override;
|
||||
|
@ -79,16 +58,6 @@ class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo {
|
|||
const TargetRegisterInfo &TRI,
|
||||
unsigned Default = AMDGPU::VGPRRegBankID) const;
|
||||
|
||||
// Return a value mapping for an operand that is required to be an SGPR.
|
||||
const ValueMapping *getSGPROpMapping(Register Reg,
|
||||
const MachineRegisterInfo &MRI,
|
||||
const TargetRegisterInfo &TRI) const;
|
||||
|
||||
// Return a value mapping for an operand that is required to be a VGPR.
|
||||
const ValueMapping *getVGPROpMapping(Register Reg,
|
||||
const MachineRegisterInfo &MRI,
|
||||
const TargetRegisterInfo &TRI) const;
|
||||
|
||||
/// Split 64-bit value \p Reg into two 32-bit halves and populate them into \p
|
||||
/// Regs. This appropriately sets the regbank of the new registers.
|
||||
void split64BitValueForMapping(MachineIRBuilder &B,
|
||||
|
@ -121,13 +90,8 @@ class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo {
|
|||
const InstructionMapping &getDefaultMappingVOP(const MachineInstr &MI) const;
|
||||
const InstructionMapping &getDefaultMappingAllVGPR(
|
||||
const MachineInstr &MI) const;
|
||||
|
||||
const InstructionMapping &getImageMapping(const MachineRegisterInfo &MRI,
|
||||
const MachineInstr &MI,
|
||||
int RsrcIdx) const;
|
||||
|
||||
public:
|
||||
AMDGPURegisterBankInfo(const GCNSubtarget &STI);
|
||||
AMDGPURegisterBankInfo(const TargetRegisterInfo &TRI);
|
||||
|
||||
unsigned copyCost(const RegisterBank &A, const RegisterBank &B,
|
||||
unsigned Size) const override;
|
||||
|
|
|
@ -283,7 +283,7 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
|
|||
MaxWavesPerEU = AMDGPU::IsaInfo::getMaxWavesPerEU(this);
|
||||
CallLoweringInfo.reset(new AMDGPUCallLowering(*getTargetLowering()));
|
||||
Legalizer.reset(new AMDGPULegalizerInfo(*this, TM));
|
||||
RegBankInfo.reset(new AMDGPURegisterBankInfo(*this));
|
||||
RegBankInfo.reset(new AMDGPURegisterBankInfo(*getRegisterInfo()));
|
||||
InstSelector.reset(new AMDGPUInstructionSelector(
|
||||
*this, *static_cast<AMDGPURegisterBankInfo *>(RegBankInfo.get()), TM));
|
||||
}
|
||||
|
|
|
@ -555,10 +555,6 @@ public:
|
|||
return GFX9Insts;
|
||||
}
|
||||
|
||||
bool hasScalarMulHiInsts() const {
|
||||
return GFX9Insts;
|
||||
}
|
||||
|
||||
TrapHandlerAbi getTrapHandlerAbi() const {
|
||||
return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone;
|
||||
}
|
||||
|
|
|
@ -1135,29 +1135,29 @@ def extract_dlc : SDNodeXForm<imm, [{
|
|||
multiclass MUBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
|
||||
string opcode> {
|
||||
def : GCNPat<
|
||||
(vt (name v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset,
|
||||
timm:$cachepolicy, 0)),
|
||||
(vt (name v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset,
|
||||
imm:$cachepolicy, 0)),
|
||||
(!cast<MUBUF_Pseudo>(opcode # _OFFSET) $rsrc, $soffset, (as_i16imm $offset),
|
||||
(extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
|
||||
>;
|
||||
|
||||
def : GCNPat<
|
||||
(vt (name v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset,
|
||||
timm:$cachepolicy, 0)),
|
||||
(vt (name v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset,
|
||||
imm:$cachepolicy, 0)),
|
||||
(!cast<MUBUF_Pseudo>(opcode # _OFFEN) $voffset, $rsrc, $soffset, (as_i16imm $offset),
|
||||
(extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
|
||||
>;
|
||||
|
||||
def : GCNPat<
|
||||
(vt (name v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset,
|
||||
timm:$cachepolicy, timm)),
|
||||
(vt (name v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset,
|
||||
imm:$cachepolicy, imm)),
|
||||
(!cast<MUBUF_Pseudo>(opcode # _IDXEN) $vindex, $rsrc, $soffset, (as_i16imm $offset),
|
||||
(extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
|
||||
>;
|
||||
|
||||
def : GCNPat<
|
||||
(vt (name v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, timm:$offset,
|
||||
timm:$cachepolicy, timm)),
|
||||
(vt (name v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, imm:$offset,
|
||||
imm:$cachepolicy, imm)),
|
||||
(!cast<MUBUF_Pseudo>(opcode # _BOTHEN)
|
||||
(REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
|
||||
$rsrc, $soffset, (as_i16imm $offset),
|
||||
|
@ -1210,31 +1210,31 @@ defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_ushort, i32, "BUFFER_LOAD_USHORT">;
|
|||
multiclass MUBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
|
||||
string opcode> {
|
||||
def : GCNPat<
|
||||
(name vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset,
|
||||
timm:$cachepolicy, 0),
|
||||
(name vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset,
|
||||
imm:$cachepolicy, 0),
|
||||
(!cast<MUBUF_Pseudo>(opcode # _OFFSET_exact) $vdata, $rsrc, $soffset, (as_i16imm $offset),
|
||||
(extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
|
||||
>;
|
||||
|
||||
def : GCNPat<
|
||||
(name vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset,
|
||||
timm:$cachepolicy, 0),
|
||||
(name vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset,
|
||||
imm:$cachepolicy, 0),
|
||||
(!cast<MUBUF_Pseudo>(opcode # _OFFEN_exact) $vdata, $voffset, $rsrc, $soffset,
|
||||
(as_i16imm $offset), (extract_glc $cachepolicy),
|
||||
(extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
|
||||
>;
|
||||
|
||||
def : GCNPat<
|
||||
(name vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset,
|
||||
timm:$cachepolicy, timm),
|
||||
(name vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset,
|
||||
imm:$cachepolicy, imm),
|
||||
(!cast<MUBUF_Pseudo>(opcode # _IDXEN_exact) $vdata, $vindex, $rsrc, $soffset,
|
||||
(as_i16imm $offset), (extract_glc $cachepolicy),
|
||||
(extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
|
||||
>;
|
||||
|
||||
def : GCNPat<
|
||||
(name vt:$vdata, v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, timm:$offset,
|
||||
timm:$cachepolicy, timm),
|
||||
(name vt:$vdata, v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, imm:$offset,
|
||||
imm:$cachepolicy, imm),
|
||||
(!cast<MUBUF_Pseudo>(opcode # _BOTHEN_exact)
|
||||
$vdata,
|
||||
(REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
|
||||
|
@ -1291,32 +1291,32 @@ multiclass BufferAtomicPatterns<SDPatternOperator name, ValueType vt,
|
|||
string opcode> {
|
||||
def : GCNPat<
|
||||
(vt (name vt:$vdata_in, v4i32:$rsrc, 0,
|
||||
0, i32:$soffset, timm:$offset,
|
||||
timm:$cachepolicy, 0)),
|
||||
0, i32:$soffset, imm:$offset,
|
||||
imm:$cachepolicy, 0)),
|
||||
(!cast<MUBUF_Pseudo>(opcode # _OFFSET_RTN) $vdata_in, $rsrc, $soffset,
|
||||
(as_i16imm $offset), (extract_slc $cachepolicy))
|
||||
>;
|
||||
|
||||
def : GCNPat<
|
||||
(vt (name vt:$vdata_in, v4i32:$rsrc, i32:$vindex,
|
||||
0, i32:$soffset, timm:$offset,
|
||||
timm:$cachepolicy, timm)),
|
||||
0, i32:$soffset, imm:$offset,
|
||||
imm:$cachepolicy, imm)),
|
||||
(!cast<MUBUF_Pseudo>(opcode # _IDXEN_RTN) $vdata_in, $vindex, $rsrc, $soffset,
|
||||
(as_i16imm $offset), (extract_slc $cachepolicy))
|
||||
>;
|
||||
|
||||
def : GCNPat<
|
||||
(vt (name vt:$vdata_in, v4i32:$rsrc, 0,
|
||||
i32:$voffset, i32:$soffset, timm:$offset,
|
||||
timm:$cachepolicy, 0)),
|
||||
i32:$voffset, i32:$soffset, imm:$offset,
|
||||
imm:$cachepolicy, 0)),
|
||||
(!cast<MUBUF_Pseudo>(opcode # _OFFEN_RTN) $vdata_in, $voffset, $rsrc, $soffset,
|
||||
(as_i16imm $offset), (extract_slc $cachepolicy))
|
||||
>;
|
||||
|
||||
def : GCNPat<
|
||||
(vt (name vt:$vdata_in, v4i32:$rsrc, i32:$vindex,
|
||||
i32:$voffset, i32:$soffset, timm:$offset,
|
||||
timm:$cachepolicy, timm)),
|
||||
i32:$voffset, i32:$soffset, imm:$offset,
|
||||
imm:$cachepolicy, imm)),
|
||||
(!cast<MUBUF_Pseudo>(opcode # _BOTHEN_RTN)
|
||||
$vdata_in,
|
||||
(REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
|
||||
|
@ -1353,32 +1353,32 @@ multiclass BufferAtomicPatterns_NO_RTN<SDPatternOperator name, ValueType vt,
|
|||
string opcode> {
|
||||
def : GCNPat<
|
||||
(name vt:$vdata_in, v4i32:$rsrc, 0,
|
||||
0, i32:$soffset, timm:$offset,
|
||||
timm:$cachepolicy, 0),
|
||||
0, i32:$soffset, imm:$offset,
|
||||
imm:$cachepolicy, 0),
|
||||
(!cast<MUBUF_Pseudo>(opcode # _OFFSET) $vdata_in, $rsrc, $soffset,
|
||||
(as_i16imm $offset), (extract_slc $cachepolicy))
|
||||
>;
|
||||
|
||||
def : GCNPat<
|
||||
(name vt:$vdata_in, v4i32:$rsrc, i32:$vindex,
|
||||
0, i32:$soffset, timm:$offset,
|
||||
timm:$cachepolicy, timm),
|
||||
0, i32:$soffset, imm:$offset,
|
||||
imm:$cachepolicy, imm),
|
||||
(!cast<MUBUF_Pseudo>(opcode # _IDXEN) $vdata_in, $vindex, $rsrc, $soffset,
|
||||
(as_i16imm $offset), (extract_slc $cachepolicy))
|
||||
>;
|
||||
|
||||
def : GCNPat<
|
||||
(name vt:$vdata_in, v4i32:$rsrc, 0,
|
||||
i32:$voffset, i32:$soffset, timm:$offset,
|
||||
timm:$cachepolicy, 0),
|
||||
i32:$voffset, i32:$soffset, imm:$offset,
|
||||
imm:$cachepolicy, 0),
|
||||
(!cast<MUBUF_Pseudo>(opcode # _OFFEN) $vdata_in, $voffset, $rsrc, $soffset,
|
||||
(as_i16imm $offset), (extract_slc $cachepolicy))
|
||||
>;
|
||||
|
||||
def : GCNPat<
|
||||
(name vt:$vdata_in, v4i32:$rsrc, i32:$vindex,
|
||||
i32:$voffset, i32:$soffset, timm:$offset,
|
||||
timm:$cachepolicy, timm),
|
||||
i32:$voffset, i32:$soffset, imm:$offset,
|
||||
imm:$cachepolicy, imm),
|
||||
(!cast<MUBUF_Pseudo>(opcode # _BOTHEN)
|
||||
$vdata_in,
|
||||
(REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
|
||||
|
@ -1392,8 +1392,8 @@ defm : BufferAtomicPatterns_NO_RTN<SIbuffer_atomic_pk_fadd, v2f16, "BUFFER_ATOMI
|
|||
def : GCNPat<
|
||||
(SIbuffer_atomic_cmpswap
|
||||
i32:$data, i32:$cmp, v4i32:$rsrc, 0,
|
||||
0, i32:$soffset, timm:$offset,
|
||||
timm:$cachepolicy, 0),
|
||||
0, i32:$soffset, imm:$offset,
|
||||
imm:$cachepolicy, 0),
|
||||
(EXTRACT_SUBREG
|
||||
(BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN
|
||||
(REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1),
|
||||
|
@ -1404,8 +1404,8 @@ def : GCNPat<
|
|||
def : GCNPat<
|
||||
(SIbuffer_atomic_cmpswap
|
||||
i32:$data, i32:$cmp, v4i32:$rsrc, i32:$vindex,
|
||||
0, i32:$soffset, timm:$offset,
|
||||
timm:$cachepolicy, timm),
|
||||
0, i32:$soffset, imm:$offset,
|
||||
imm:$cachepolicy, imm),
|
||||
(EXTRACT_SUBREG
|
||||
(BUFFER_ATOMIC_CMPSWAP_IDXEN_RTN
|
||||
(REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1),
|
||||
|
@ -1416,8 +1416,8 @@ def : GCNPat<
|
|||
def : GCNPat<
|
||||
(SIbuffer_atomic_cmpswap
|
||||
i32:$data, i32:$cmp, v4i32:$rsrc, 0,
|
||||
i32:$voffset, i32:$soffset, timm:$offset,
|
||||
timm:$cachepolicy, 0),
|
||||
i32:$voffset, i32:$soffset, imm:$offset,
|
||||
imm:$cachepolicy, 0),
|
||||
(EXTRACT_SUBREG
|
||||
(BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN
|
||||
(REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1),
|
||||
|
@ -1428,8 +1428,8 @@ def : GCNPat<
|
|||
def : GCNPat<
|
||||
(SIbuffer_atomic_cmpswap
|
||||
i32:$data, i32:$cmp, v4i32:$rsrc, i32:$vindex,
|
||||
i32:$voffset, i32:$soffset, timm:$offset,
|
||||
timm:$cachepolicy, timm),
|
||||
i32:$voffset, i32:$soffset, imm:$offset,
|
||||
imm:$cachepolicy, imm),
|
||||
(EXTRACT_SUBREG
|
||||
(BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN
|
||||
(REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1),
|
||||
|
@ -1642,32 +1642,32 @@ defm : MUBUFScratchStorePat <BUFFER_STORE_BYTE_D16_HI_OFFEN, BUFFER_STORE_BYTE_D
|
|||
multiclass MTBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
|
||||
string opcode> {
|
||||
def : GCNPat<
|
||||
(vt (name v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset,
|
||||
timm:$format, timm:$cachepolicy, 0)),
|
||||
(vt (name v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset,
|
||||
imm:$format, imm:$cachepolicy, 0)),
|
||||
(!cast<MTBUF_Pseudo>(opcode # _OFFSET) $rsrc, $soffset, (as_i16imm $offset),
|
||||
(as_i8imm $format),
|
||||
(extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
|
||||
>;
|
||||
|
||||
def : GCNPat<
|
||||
(vt (name v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset,
|
||||
timm:$format, timm:$cachepolicy, timm)),
|
||||
(vt (name v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset,
|
||||
imm:$format, imm:$cachepolicy, imm)),
|
||||
(!cast<MTBUF_Pseudo>(opcode # _IDXEN) $vindex, $rsrc, $soffset, (as_i16imm $offset),
|
||||
(as_i8imm $format),
|
||||
(extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
|
||||
>;
|
||||
|
||||
def : GCNPat<
|
||||
(vt (name v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset,
|
||||
timm:$format, timm:$cachepolicy, 0)),
|
||||
(vt (name v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset,
|
||||
imm:$format, imm:$cachepolicy, 0)),
|
||||
(!cast<MTBUF_Pseudo>(opcode # _OFFEN) $voffset, $rsrc, $soffset, (as_i16imm $offset),
|
||||
(as_i8imm $format),
|
||||
(extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
|
||||
>;
|
||||
|
||||
def : GCNPat<
|
||||
(vt (name v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, timm:$offset,
|
||||
timm:$format, timm:$cachepolicy, timm)),
|
||||
(vt (name v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, imm:$offset,
|
||||
imm:$format, imm:$cachepolicy, imm)),
|
||||
(!cast<MTBUF_Pseudo>(opcode # _BOTHEN)
|
||||
(REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
|
||||
$rsrc, $soffset, (as_i16imm $offset),
|
||||
|
@ -1700,24 +1700,24 @@ let SubtargetPredicate = HasPackedD16VMem in {
|
|||
multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
|
||||
string opcode> {
|
||||
def : GCNPat<
|
||||
(name vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset,
|
||||
timm:$format, timm:$cachepolicy, 0),
|
||||
(name vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset,
|
||||
imm:$format, imm:$cachepolicy, 0),
|
||||
(!cast<MTBUF_Pseudo>(opcode # _OFFSET_exact) $vdata, $rsrc, $soffset,
|
||||
(as_i16imm $offset), (as_i8imm $format),
|
||||
(extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
|
||||
>;
|
||||
|
||||
def : GCNPat<
|
||||
(name vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset,
|
||||
timm:$format, timm:$cachepolicy, timm),
|
||||
(name vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset,
|
||||
imm:$format, imm:$cachepolicy, imm),
|
||||
(!cast<MTBUF_Pseudo>(opcode # _IDXEN_exact) $vdata, $vindex, $rsrc, $soffset,
|
||||
(as_i16imm $offset), (as_i8imm $format),
|
||||
(extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
|
||||
>;
|
||||
|
||||
def : GCNPat<
|
||||
(name vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset,
|
||||
timm:$format, timm:$cachepolicy, 0),
|
||||
(name vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset,
|
||||
imm:$format, imm:$cachepolicy, 0),
|
||||
(!cast<MTBUF_Pseudo>(opcode # _OFFEN_exact) $vdata, $voffset, $rsrc, $soffset,
|
||||
(as_i16imm $offset), (as_i8imm $format),
|
||||
(extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
|
||||
|
@ -1725,7 +1725,7 @@ multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
|
|||
|
||||
def : GCNPat<
|
||||
(name vt:$vdata, v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset,
|
||||
timm:$offset, timm:$format, timm:$cachepolicy, timm),
|
||||
imm:$offset, imm:$format, imm:$cachepolicy, imm),
|
||||
(!cast<MTBUF_Pseudo>(opcode # _BOTHEN_exact)
|
||||
$vdata,
|
||||
(REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
|
||||
|
|
|
@ -603,7 +603,7 @@ def DS_ADD_SRC2_F32 : DS_1A<"ds_add_src2_f32">;
|
|||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def : GCNPat <
|
||||
(int_amdgcn_ds_swizzle i32:$src, timm:$offset16),
|
||||
(int_amdgcn_ds_swizzle i32:$src, imm:$offset16),
|
||||
(DS_SWIZZLE_B32 $src, (as_i16imm $offset16), (i1 0))
|
||||
>;
|
||||
|
||||
|
|
|
@ -5666,14 +5666,14 @@ SDValue SITargetLowering::lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc,
|
|||
SDVTList VTList = DAG.getVTList({LoadVT, MVT::Glue});
|
||||
unsigned CachePolicy = cast<ConstantSDNode>(GLC)->getZExtValue();
|
||||
SDValue Ops[] = {
|
||||
DAG.getEntryNode(), // Chain
|
||||
Rsrc, // rsrc
|
||||
DAG.getConstant(0, DL, MVT::i32), // vindex
|
||||
{}, // voffset
|
||||
{}, // soffset
|
||||
{}, // offset
|
||||
DAG.getTargetConstant(CachePolicy, DL, MVT::i32), // cachepolicy
|
||||
DAG.getTargetConstant(0, DL, MVT::i1), // idxen
|
||||
DAG.getEntryNode(), // Chain
|
||||
Rsrc, // rsrc
|
||||
DAG.getConstant(0, DL, MVT::i32), // vindex
|
||||
{}, // voffset
|
||||
{}, // soffset
|
||||
{}, // offset
|
||||
DAG.getConstant(CachePolicy, DL, MVT::i32), // cachepolicy
|
||||
DAG.getConstant(0, DL, MVT::i1), // idxen
|
||||
};
|
||||
|
||||
// Use the alignment to ensure that the required offsets will fit into the
|
||||
|
@ -5682,7 +5682,7 @@ SDValue SITargetLowering::lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc,
|
|||
|
||||
uint64_t InstOffset = cast<ConstantSDNode>(Ops[5])->getZExtValue();
|
||||
for (unsigned i = 0; i < NumLoads; ++i) {
|
||||
Ops[5] = DAG.getTargetConstant(InstOffset + 16 * i, DL, MVT::i32);
|
||||
Ops[5] = DAG.getConstant(InstOffset + 16 * i, DL, MVT::i32);
|
||||
Loads.push_back(DAG.getMemIntrinsicNode(AMDGPUISD::BUFFER_LOAD, DL, VTList,
|
||||
Ops, LoadVT, MMO));
|
||||
}
|
||||
|
@ -5894,12 +5894,12 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
|||
Op.getOperand(1), // Src0
|
||||
Op.getOperand(2), // Attrchan
|
||||
Op.getOperand(3), // Attr
|
||||
DAG.getTargetConstant(0, DL, MVT::i32), // $src0_modifiers
|
||||
DAG.getConstant(0, DL, MVT::i32), // $src0_modifiers
|
||||
S, // Src2 - holds two f16 values selected by high
|
||||
DAG.getTargetConstant(0, DL, MVT::i32), // $src2_modifiers
|
||||
DAG.getConstant(0, DL, MVT::i32), // $src2_modifiers
|
||||
Op.getOperand(4), // high
|
||||
DAG.getTargetConstant(0, DL, MVT::i1), // $clamp
|
||||
DAG.getTargetConstant(0, DL, MVT::i32) // $omod
|
||||
DAG.getConstant(0, DL, MVT::i1), // $clamp
|
||||
DAG.getConstant(0, DL, MVT::i32) // $omod
|
||||
};
|
||||
return DAG.getNode(AMDGPUISD::INTERP_P1LV_F16, DL, MVT::f32, Ops);
|
||||
} else {
|
||||
|
@ -5908,10 +5908,10 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
|||
Op.getOperand(1), // Src0
|
||||
Op.getOperand(2), // Attrchan
|
||||
Op.getOperand(3), // Attr
|
||||
DAG.getTargetConstant(0, DL, MVT::i32), // $src0_modifiers
|
||||
DAG.getConstant(0, DL, MVT::i32), // $src0_modifiers
|
||||
Op.getOperand(4), // high
|
||||
DAG.getTargetConstant(0, DL, MVT::i1), // $clamp
|
||||
DAG.getTargetConstant(0, DL, MVT::i32), // $omod
|
||||
DAG.getConstant(0, DL, MVT::i1), // $clamp
|
||||
DAG.getConstant(0, DL, MVT::i32), // $omod
|
||||
Glue
|
||||
};
|
||||
return DAG.getNode(AMDGPUISD::INTERP_P1LL_F16, DL, MVT::f32, Ops);
|
||||
|
@ -5924,11 +5924,11 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
|||
Op.getOperand(2), // Src0
|
||||
Op.getOperand(3), // Attrchan
|
||||
Op.getOperand(4), // Attr
|
||||
DAG.getTargetConstant(0, DL, MVT::i32), // $src0_modifiers
|
||||
DAG.getConstant(0, DL, MVT::i32), // $src0_modifiers
|
||||
Op.getOperand(1), // Src2
|
||||
DAG.getTargetConstant(0, DL, MVT::i32), // $src2_modifiers
|
||||
DAG.getConstant(0, DL, MVT::i32), // $src2_modifiers
|
||||
Op.getOperand(5), // high
|
||||
DAG.getTargetConstant(0, DL, MVT::i1), // $clamp
|
||||
DAG.getConstant(0, DL, MVT::i1), // $clamp
|
||||
Glue
|
||||
};
|
||||
return DAG.getNode(AMDGPUISD::INTERP_P2_F16, DL, MVT::f16, Ops);
|
||||
|
@ -6234,8 +6234,8 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
|
|||
SDValue(), // voffset -- will be set by setBufferOffsets
|
||||
SDValue(), // soffset -- will be set by setBufferOffsets
|
||||
SDValue(), // offset -- will be set by setBufferOffsets
|
||||
DAG.getTargetConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy
|
||||
DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen
|
||||
DAG.getConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy
|
||||
DAG.getConstant(IdxEn, DL, MVT::i1), // idxen
|
||||
};
|
||||
|
||||
setBufferOffsets(Op.getOperand(4), DAG, &Ops[3]);
|
||||
|
@ -6272,7 +6272,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
|
|||
Op.getOperand(4), // soffset
|
||||
Offsets.second, // offset
|
||||
Op.getOperand(5), // cachepolicy
|
||||
DAG.getTargetConstant(0, DL, MVT::i1), // idxen
|
||||
DAG.getConstant(0, DL, MVT::i1), // idxen
|
||||
};
|
||||
|
||||
return lowerIntrinsicLoad(cast<MemSDNode>(Op), IsFormat, DAG, Ops);
|
||||
|
@ -6290,7 +6290,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
|
|||
Op.getOperand(5), // soffset
|
||||
Offsets.second, // offset
|
||||
Op.getOperand(6), // cachepolicy
|
||||
DAG.getTargetConstant(1, DL, MVT::i1), // idxen
|
||||
DAG.getConstant(1, DL, MVT::i1), // idxen
|
||||
};
|
||||
|
||||
return lowerIntrinsicLoad(cast<MemSDNode>(Op), IsFormat, DAG, Ops);
|
||||
|
@ -6313,9 +6313,9 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
|
|||
Op.getOperand(4), // voffset
|
||||
Op.getOperand(5), // soffset
|
||||
Op.getOperand(6), // offset
|
||||
DAG.getTargetConstant(Dfmt | (Nfmt << 4), DL, MVT::i32), // format
|
||||
DAG.getTargetConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy
|
||||
DAG.getTargetConstant(IdxEn, DL, MVT::i1) // idxen
|
||||
DAG.getConstant(Dfmt | (Nfmt << 4), DL, MVT::i32), // format
|
||||
DAG.getConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy
|
||||
DAG.getConstant(IdxEn, DL, MVT::i1), // idxen
|
||||
};
|
||||
|
||||
if (LoadVT.getScalarType() == MVT::f16)
|
||||
|
@ -6339,7 +6339,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
|
|||
Offsets.second, // offset
|
||||
Op.getOperand(5), // format
|
||||
Op.getOperand(6), // cachepolicy
|
||||
DAG.getTargetConstant(0, DL, MVT::i1), // idxen
|
||||
DAG.getConstant(0, DL, MVT::i1), // idxen
|
||||
};
|
||||
|
||||
if (LoadVT.getScalarType() == MVT::f16)
|
||||
|
@ -6363,7 +6363,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
|
|||
Offsets.second, // offset
|
||||
Op.getOperand(6), // format
|
||||
Op.getOperand(7), // cachepolicy
|
||||
DAG.getTargetConstant(1, DL, MVT::i1), // idxen
|
||||
DAG.getConstant(1, DL, MVT::i1), // idxen
|
||||
};
|
||||
|
||||
if (LoadVT.getScalarType() == MVT::f16)
|
||||
|
@ -6395,8 +6395,8 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
|
|||
SDValue(), // voffset -- will be set by setBufferOffsets
|
||||
SDValue(), // soffset -- will be set by setBufferOffsets
|
||||
SDValue(), // offset -- will be set by setBufferOffsets
|
||||
DAG.getTargetConstant(Slc << 1, DL, MVT::i32), // cachepolicy
|
||||
DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen
|
||||
DAG.getConstant(Slc << 1, DL, MVT::i32), // cachepolicy
|
||||
DAG.getConstant(IdxEn, DL, MVT::i1), // idxen
|
||||
};
|
||||
setBufferOffsets(Op.getOperand(5), DAG, &Ops[4]);
|
||||
EVT VT = Op.getValueType();
|
||||
|
@ -6464,7 +6464,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
|
|||
Op.getOperand(5), // soffset
|
||||
Offsets.second, // offset
|
||||
Op.getOperand(6), // cachepolicy
|
||||
DAG.getTargetConstant(0, DL, MVT::i1), // idxen
|
||||
DAG.getConstant(0, DL, MVT::i1), // idxen
|
||||
};
|
||||
EVT VT = Op.getValueType();
|
||||
|
||||
|
@ -6537,7 +6537,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
|
|||
Op.getOperand(6), // soffset
|
||||
Offsets.second, // offset
|
||||
Op.getOperand(7), // cachepolicy
|
||||
DAG.getTargetConstant(1, DL, MVT::i1), // idxen
|
||||
DAG.getConstant(1, DL, MVT::i1), // idxen
|
||||
};
|
||||
EVT VT = Op.getValueType();
|
||||
|
||||
|
@ -6602,8 +6602,8 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
|
|||
SDValue(), // voffset -- will be set by setBufferOffsets
|
||||
SDValue(), // soffset -- will be set by setBufferOffsets
|
||||
SDValue(), // offset -- will be set by setBufferOffsets
|
||||
DAG.getTargetConstant(Slc << 1, DL, MVT::i32), // cachepolicy
|
||||
DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen
|
||||
DAG.getConstant(Slc << 1, DL, MVT::i32), // cachepolicy
|
||||
DAG.getConstant(IdxEn, DL, MVT::i1), // idxen
|
||||
};
|
||||
setBufferOffsets(Op.getOperand(6), DAG, &Ops[5]);
|
||||
EVT VT = Op.getValueType();
|
||||
|
@ -6624,7 +6624,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
|
|||
Op.getOperand(6), // soffset
|
||||
Offsets.second, // offset
|
||||
Op.getOperand(7), // cachepolicy
|
||||
DAG.getTargetConstant(0, DL, MVT::i1), // idxen
|
||||
DAG.getConstant(0, DL, MVT::i1), // idxen
|
||||
};
|
||||
EVT VT = Op.getValueType();
|
||||
auto *M = cast<MemSDNode>(Op);
|
||||
|
@ -6644,7 +6644,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
|
|||
Op.getOperand(7), // soffset
|
||||
Offsets.second, // offset
|
||||
Op.getOperand(8), // cachepolicy
|
||||
DAG.getTargetConstant(1, DL, MVT::i1), // idxen
|
||||
DAG.getConstant(1, DL, MVT::i1), // idxen
|
||||
};
|
||||
EVT VT = Op.getValueType();
|
||||
auto *M = cast<MemSDNode>(Op);
|
||||
|
@ -6806,9 +6806,9 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
|
|||
Op.getOperand(5), // voffset
|
||||
Op.getOperand(6), // soffset
|
||||
Op.getOperand(7), // offset
|
||||
DAG.getTargetConstant(Dfmt | (Nfmt << 4), DL, MVT::i32), // format
|
||||
DAG.getTargetConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy
|
||||
DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idexen
|
||||
DAG.getConstant(Dfmt | (Nfmt << 4), DL, MVT::i32), // format
|
||||
DAG.getConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy
|
||||
DAG.getConstant(IdxEn, DL, MVT::i1), // idexen
|
||||
};
|
||||
unsigned Opc = IsD16 ? AMDGPUISD::TBUFFER_STORE_FORMAT_D16 :
|
||||
AMDGPUISD::TBUFFER_STORE_FORMAT;
|
||||
|
@ -6833,7 +6833,7 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
|
|||
Offsets.second, // offset
|
||||
Op.getOperand(7), // format
|
||||
Op.getOperand(8), // cachepolicy
|
||||
DAG.getTargetConstant(1, DL, MVT::i1), // idexen
|
||||
DAG.getConstant(1, DL, MVT::i1), // idexen
|
||||
};
|
||||
unsigned Opc = IsD16 ? AMDGPUISD::TBUFFER_STORE_FORMAT_D16 :
|
||||
AMDGPUISD::TBUFFER_STORE_FORMAT;
|
||||
|
@ -6858,7 +6858,7 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
|
|||
Offsets.second, // offset
|
||||
Op.getOperand(6), // format
|
||||
Op.getOperand(7), // cachepolicy
|
||||
DAG.getTargetConstant(0, DL, MVT::i1), // idexen
|
||||
DAG.getConstant(0, DL, MVT::i1), // idexen
|
||||
};
|
||||
unsigned Opc = IsD16 ? AMDGPUISD::TBUFFER_STORE_FORMAT_D16 :
|
||||
AMDGPUISD::TBUFFER_STORE_FORMAT;
|
||||
|
@ -6886,8 +6886,8 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
|
|||
SDValue(), // voffset -- will be set by setBufferOffsets
|
||||
SDValue(), // soffset -- will be set by setBufferOffsets
|
||||
SDValue(), // offset -- will be set by setBufferOffsets
|
||||
DAG.getTargetConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy
|
||||
DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen
|
||||
DAG.getConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy
|
||||
DAG.getConstant(IdxEn, DL, MVT::i1), // idxen
|
||||
};
|
||||
setBufferOffsets(Op.getOperand(5), DAG, &Ops[4]);
|
||||
unsigned Opc = IntrinsicID == Intrinsic::amdgcn_buffer_store ?
|
||||
|
@ -6932,7 +6932,7 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
|
|||
Op.getOperand(5), // soffset
|
||||
Offsets.second, // offset
|
||||
Op.getOperand(6), // cachepolicy
|
||||
DAG.getTargetConstant(0, DL, MVT::i1), // idxen
|
||||
DAG.getConstant(0, DL, MVT::i1), // idxen
|
||||
};
|
||||
unsigned Opc =
|
||||
IsFormat ? AMDGPUISD::BUFFER_STORE_FORMAT : AMDGPUISD::BUFFER_STORE;
|
||||
|
@ -6976,7 +6976,7 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
|
|||
Op.getOperand(6), // soffset
|
||||
Offsets.second, // offset
|
||||
Op.getOperand(7), // cachepolicy
|
||||
DAG.getTargetConstant(1, DL, MVT::i1), // idxen
|
||||
DAG.getConstant(1, DL, MVT::i1), // idxen
|
||||
};
|
||||
unsigned Opc = IntrinsicID == Intrinsic::amdgcn_struct_buffer_store ?
|
||||
AMDGPUISD::BUFFER_STORE : AMDGPUISD::BUFFER_STORE_FORMAT;
|
||||
|
@ -7005,8 +7005,8 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
|
|||
SDValue(), // voffset -- will be set by setBufferOffsets
|
||||
SDValue(), // soffset -- will be set by setBufferOffsets
|
||||
SDValue(), // offset -- will be set by setBufferOffsets
|
||||
DAG.getTargetConstant(Slc << 1, DL, MVT::i32), // cachepolicy
|
||||
DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen
|
||||
DAG.getConstant(Slc << 1, DL, MVT::i32), // cachepolicy
|
||||
DAG.getConstant(IdxEn, DL, MVT::i1), // idxen
|
||||
};
|
||||
setBufferOffsets(Op.getOperand(5), DAG, &Ops[4]);
|
||||
EVT VT = Op.getOperand(2).getValueType();
|
||||
|
@ -7084,7 +7084,7 @@ std::pair<SDValue, SDValue> SITargetLowering::splitBufferOffsets(
|
|||
Overflow += ImmOffset;
|
||||
ImmOffset = 0;
|
||||
}
|
||||
C1 = cast<ConstantSDNode>(DAG.getTargetConstant(ImmOffset, DL, MVT::i32));
|
||||
C1 = cast<ConstantSDNode>(DAG.getConstant(ImmOffset, DL, MVT::i32));
|
||||
if (Overflow) {
|
||||
auto OverflowVal = DAG.getConstant(Overflow, DL, MVT::i32);
|
||||
if (!N0)
|
||||
|
@ -7098,7 +7098,7 @@ std::pair<SDValue, SDValue> SITargetLowering::splitBufferOffsets(
|
|||
if (!N0)
|
||||
N0 = DAG.getConstant(0, DL, MVT::i32);
|
||||
if (!C1)
|
||||
C1 = cast<ConstantSDNode>(DAG.getTargetConstant(0, DL, MVT::i32));
|
||||
C1 = cast<ConstantSDNode>(DAG.getConstant(0, DL, MVT::i32));
|
||||
return {N0, SDValue(C1, 0)};
|
||||
}
|
||||
|
||||
|
@ -7115,7 +7115,7 @@ void SITargetLowering::setBufferOffsets(SDValue CombinedOffset,
|
|||
if (AMDGPU::splitMUBUFOffset(Imm, SOffset, ImmOffset, Subtarget, Align)) {
|
||||
Offsets[0] = DAG.getConstant(0, DL, MVT::i32);
|
||||
Offsets[1] = DAG.getConstant(SOffset, DL, MVT::i32);
|
||||
Offsets[2] = DAG.getTargetConstant(ImmOffset, DL, MVT::i32);
|
||||
Offsets[2] = DAG.getConstant(ImmOffset, DL, MVT::i32);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
@ -7128,13 +7128,13 @@ void SITargetLowering::setBufferOffsets(SDValue CombinedOffset,
|
|||
Subtarget, Align)) {
|
||||
Offsets[0] = N0;
|
||||
Offsets[1] = DAG.getConstant(SOffset, DL, MVT::i32);
|
||||
Offsets[2] = DAG.getTargetConstant(ImmOffset, DL, MVT::i32);
|
||||
Offsets[2] = DAG.getConstant(ImmOffset, DL, MVT::i32);
|
||||
return;
|
||||
}
|
||||
}
|
||||
Offsets[0] = CombinedOffset;
|
||||
Offsets[1] = DAG.getConstant(0, DL, MVT::i32);
|
||||
Offsets[2] = DAG.getTargetConstant(0, DL, MVT::i32);
|
||||
Offsets[2] = DAG.getConstant(0, DL, MVT::i32);
|
||||
}
|
||||
|
||||
// Handle 8 bit and 16 bit buffer loads
|
||||
|
|
|
@ -43,8 +43,8 @@ multiclass V_INTERP_P1_F32_m : VINTRP_m <
|
|||
(outs VINTRPDst:$vdst),
|
||||
(ins VGPR_32:$vsrc, Attr:$attr, AttrChan:$attrchan),
|
||||
"v_interp_p1_f32$vdst, $vsrc, $attr$attrchan",
|
||||
[(set f32:$vdst, (AMDGPUinterp_p1 f32:$vsrc, (i32 timm:$attrchan),
|
||||
(i32 timm:$attr)))]
|
||||
[(set f32:$vdst, (AMDGPUinterp_p1 f32:$vsrc, (i32 imm:$attrchan),
|
||||
(i32 imm:$attr)))]
|
||||
>;
|
||||
|
||||
let OtherPredicates = [has32BankLDS] in {
|
||||
|
@ -66,8 +66,8 @@ defm V_INTERP_P2_F32 : VINTRP_m <
|
|||
(outs VINTRPDst:$vdst),
|
||||
(ins VGPR_32:$src0, VGPR_32:$vsrc, Attr:$attr, AttrChan:$attrchan),
|
||||
"v_interp_p2_f32$vdst, $vsrc, $attr$attrchan",
|
||||
[(set f32:$vdst, (AMDGPUinterp_p2 f32:$src0, f32:$vsrc, (i32 timm:$attrchan),
|
||||
(i32 timm:$attr)))]>;
|
||||
[(set f32:$vdst, (AMDGPUinterp_p2 f32:$src0, f32:$vsrc, (i32 imm:$attrchan),
|
||||
(i32 imm:$attr)))]>;
|
||||
|
||||
} // End DisableEncoding = "$src0", Constraints = "$src0 = $vdst"
|
||||
|
||||
|
@ -76,8 +76,8 @@ defm V_INTERP_MOV_F32 : VINTRP_m <
|
|||
(outs VINTRPDst:$vdst),
|
||||
(ins InterpSlot:$vsrc, Attr:$attr, AttrChan:$attrchan),
|
||||
"v_interp_mov_f32$vdst, $vsrc, $attr$attrchan",
|
||||
[(set f32:$vdst, (AMDGPUinterp_mov (i32 imm:$vsrc), (i32 timm:$attrchan),
|
||||
(i32 timm:$attr)))]>;
|
||||
[(set f32:$vdst, (AMDGPUinterp_mov (i32 imm:$vsrc), (i32 imm:$attrchan),
|
||||
(i32 imm:$attr)))]>;
|
||||
|
||||
} // End Uses = [M0, EXEC]
|
||||
|
||||
|
|
|
@ -1090,7 +1090,7 @@ def S_WAKEUP : SOPP <0x00000003, (ins), "s_wakeup"> {
|
|||
|
||||
let mayLoad = 1, mayStore = 1, hasSideEffects = 1 in
|
||||
def S_WAITCNT : SOPP <0x0000000c, (ins WAIT_FLAG:$simm16), "s_waitcnt $simm16",
|
||||
[(int_amdgcn_s_waitcnt timm:$simm16)]>;
|
||||
[(int_amdgcn_s_waitcnt UIMM16bit:$simm16)]>;
|
||||
def S_SETHALT : SOPP <0x0000000d, (ins i16imm:$simm16), "s_sethalt $simm16">;
|
||||
def S_SETKILL : SOPP <0x0000000b, (ins i16imm:$simm16), "s_setkill $simm16">;
|
||||
|
||||
|
@ -1099,7 +1099,7 @@ def S_SETKILL : SOPP <0x0000000b, (ins i16imm:$simm16), "s_setkill $simm16">;
|
|||
// maximum reported is 960 cycles, so 960 / 64 = 15 max, so is the
|
||||
// maximum really 15 on VI?
|
||||
def S_SLEEP : SOPP <0x0000000e, (ins i32imm:$simm16),
|
||||
"s_sleep $simm16", [(int_amdgcn_s_sleep timm:$simm16)]> {
|
||||
"s_sleep $simm16", [(int_amdgcn_s_sleep SIMM16bit:$simm16)]> {
|
||||
let hasSideEffects = 1;
|
||||
let mayLoad = 1;
|
||||
let mayStore = 1;
|
||||
|
@ -1110,10 +1110,10 @@ def S_SETPRIO : SOPP <0x0000000f, (ins i16imm:$simm16), "s_setprio $simm16">;
|
|||
let Uses = [EXEC, M0] in {
|
||||
// FIXME: Should this be mayLoad+mayStore?
|
||||
def S_SENDMSG : SOPP <0x00000010, (ins SendMsgImm:$simm16), "s_sendmsg $simm16",
|
||||
[(int_amdgcn_s_sendmsg (i32 timm:$simm16), M0)]>;
|
||||
[(int_amdgcn_s_sendmsg (i32 imm:$simm16), M0)]>;
|
||||
|
||||
def S_SENDMSGHALT : SOPP <0x00000011, (ins SendMsgImm:$simm16), "s_sendmsghalt $simm16",
|
||||
[(int_amdgcn_s_sendmsghalt (i32 timm:$simm16), M0)]>;
|
||||
[(int_amdgcn_s_sendmsghalt (i32 imm:$simm16), M0)]>;
|
||||
|
||||
} // End Uses = [EXEC, M0]
|
||||
|
||||
|
@ -1125,13 +1125,13 @@ def S_ICACHE_INV : SOPP <0x00000013, (ins), "s_icache_inv"> {
|
|||
let simm16 = 0;
|
||||
}
|
||||
def S_INCPERFLEVEL : SOPP <0x00000014, (ins i32imm:$simm16), "s_incperflevel $simm16",
|
||||
[(int_amdgcn_s_incperflevel timm:$simm16)]> {
|
||||
[(int_amdgcn_s_incperflevel SIMM16bit:$simm16)]> {
|
||||
let hasSideEffects = 1;
|
||||
let mayLoad = 1;
|
||||
let mayStore = 1;
|
||||
}
|
||||
def S_DECPERFLEVEL : SOPP <0x00000015, (ins i32imm:$simm16), "s_decperflevel $simm16",
|
||||
[(int_amdgcn_s_decperflevel timm:$simm16)]> {
|
||||
[(int_amdgcn_s_decperflevel SIMM16bit:$simm16)]> {
|
||||
let hasSideEffects = 1;
|
||||
let mayLoad = 1;
|
||||
let mayStore = 1;
|
||||
|
@ -1180,7 +1180,7 @@ let SubtargetPredicate = isGFX10Plus in {
|
|||
// S_GETREG_B32 Intrinsic Pattern.
|
||||
//===----------------------------------------------------------------------===//
|
||||
def : GCNPat <
|
||||
(int_amdgcn_s_getreg timm:$simm16),
|
||||
(int_amdgcn_s_getreg imm:$simm16),
|
||||
(S_GETREG_B32 (as_i16imm $simm16))
|
||||
>;
|
||||
|
||||
|
|
|
@ -841,16 +841,16 @@ def V_MOVRELD_B32_V16 : V_MOVRELD_B32_pseudo<VReg_512>;
|
|||
let OtherPredicates = [isGFX8GFX9] in {
|
||||
|
||||
def : GCNPat <
|
||||
(i32 (int_amdgcn_mov_dpp i32:$src, timm:$dpp_ctrl, timm:$row_mask, timm:$bank_mask,
|
||||
timm:$bound_ctrl)),
|
||||
(i32 (int_amdgcn_mov_dpp i32:$src, imm:$dpp_ctrl, imm:$row_mask, imm:$bank_mask,
|
||||
imm:$bound_ctrl)),
|
||||
(V_MOV_B32_dpp $src, $src, (as_i32imm $dpp_ctrl),
|
||||
(as_i32imm $row_mask), (as_i32imm $bank_mask),
|
||||
(as_i1imm $bound_ctrl))
|
||||
>;
|
||||
|
||||
def : GCNPat <
|
||||
(i32 (int_amdgcn_update_dpp i32:$old, i32:$src, timm:$dpp_ctrl, timm:$row_mask,
|
||||
timm:$bank_mask, timm:$bound_ctrl)),
|
||||
(i32 (int_amdgcn_update_dpp i32:$old, i32:$src, imm:$dpp_ctrl, imm:$row_mask,
|
||||
imm:$bank_mask, imm:$bound_ctrl)),
|
||||
(V_MOV_B32_dpp $old, $src, (as_i32imm $dpp_ctrl),
|
||||
(as_i32imm $row_mask), (as_i32imm $bank_mask),
|
||||
(as_i1imm $bound_ctrl))
|
||||
|
@ -911,21 +911,21 @@ defm V_SCREEN_PARTITION_4SE_B32 : VOP1_Real_gfx9 <0x37>;
|
|||
|
||||
let OtherPredicates = [isGFX10Plus] in {
|
||||
def : GCNPat <
|
||||
(i32 (int_amdgcn_mov_dpp8 i32:$src, timm:$dpp8)),
|
||||
(i32 (int_amdgcn_mov_dpp8 i32:$src, imm:$dpp8)),
|
||||
(V_MOV_B32_dpp8_gfx10 $src, $src, (as_i32imm $dpp8), (i32 DPP8Mode.FI_0))
|
||||
>;
|
||||
|
||||
def : GCNPat <
|
||||
(i32 (int_amdgcn_mov_dpp i32:$src, timm:$dpp_ctrl, timm:$row_mask, timm:$bank_mask,
|
||||
timm:$bound_ctrl)),
|
||||
(i32 (int_amdgcn_mov_dpp i32:$src, imm:$dpp_ctrl, imm:$row_mask, imm:$bank_mask,
|
||||
imm:$bound_ctrl)),
|
||||
(V_MOV_B32_dpp_gfx10 $src, $src, (as_i32imm $dpp_ctrl),
|
||||
(as_i32imm $row_mask), (as_i32imm $bank_mask),
|
||||
(as_i1imm $bound_ctrl), (i32 0))
|
||||
>;
|
||||
|
||||
def : GCNPat <
|
||||
(i32 (int_amdgcn_update_dpp i32:$old, i32:$src, timm:$dpp_ctrl, timm:$row_mask,
|
||||
timm:$bank_mask, timm:$bound_ctrl)),
|
||||
(i32 (int_amdgcn_update_dpp i32:$old, i32:$src, imm:$dpp_ctrl, imm:$row_mask,
|
||||
imm:$bank_mask, imm:$bound_ctrl)),
|
||||
(V_MOV_B32_dpp_gfx10 $old, $src, (as_i32imm $dpp_ctrl),
|
||||
(as_i32imm $row_mask), (as_i32imm $bank_mask),
|
||||
(as_i1imm $bound_ctrl), (i32 0))
|
||||
|
|
|
@ -112,7 +112,7 @@ class getVOP3ClampPat<VOPProfile P, SDPatternOperator node> {
|
|||
|
||||
class getVOP3MAIPat<VOPProfile P, SDPatternOperator node> {
|
||||
list<dag> ret = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2,
|
||||
timm:$cbsz, timm:$abid, timm:$blgp))];
|
||||
imm:$cbsz, imm:$abid, imm:$blgp))];
|
||||
}
|
||||
|
||||
class VOP3Inst<string OpName, VOPProfile P, SDPatternOperator node = null_frag, bit VOP3Only = 0> :
|
||||
|
@ -453,13 +453,13 @@ let FPDPRounding = 1 in {
|
|||
def V_MAD_F16 : VOP3Inst <"v_mad_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, fmad>;
|
||||
let Uses = [M0, EXEC] in {
|
||||
def V_INTERP_P2_F16 : VOP3Interp <"v_interp_p2_f16", VOP3_INTERP16<[f16, f32, i32, f32]>,
|
||||
[(set f16:$vdst, (AMDGPUinterp_p2_f16 f32:$src0, (i32 timm:$attrchan),
|
||||
(i32 timm:$attr),
|
||||
(i32 timm:$src0_modifiers),
|
||||
[(set f16:$vdst, (AMDGPUinterp_p2_f16 f32:$src0, (i32 imm:$attrchan),
|
||||
(i32 imm:$attr),
|
||||
(i32 imm:$src0_modifiers),
|
||||
(f32 VRegSrc_32:$src2),
|
||||
(i32 timm:$src2_modifiers),
|
||||
(i1 timm:$high),
|
||||
(i1 timm:$clamp)))]>;
|
||||
(i32 imm:$src2_modifiers),
|
||||
(i1 imm:$high),
|
||||
(i1 imm:$clamp)))]>;
|
||||
} // End Uses = [M0, EXEC]
|
||||
} // End FPDPRounding = 1
|
||||
} // End renamedInGFX9 = 1
|
||||
|
@ -478,21 +478,21 @@ def V_INTERP_P2_F16_gfx9 : VOP3Interp <"v_interp_p2_f16_gfx9", VOP3_INTERP16<[f1
|
|||
|
||||
let Uses = [M0, EXEC], FPDPRounding = 1 in {
|
||||
def V_INTERP_P1LL_F16 : VOP3Interp <"v_interp_p1ll_f16", VOP3_INTERP16<[f32, f32, i32, untyped]>,
|
||||
[(set f32:$vdst, (AMDGPUinterp_p1ll_f16 f32:$src0, (i32 timm:$attrchan),
|
||||
(i32 timm:$attr),
|
||||
(i32 timm:$src0_modifiers),
|
||||
(i1 timm:$high),
|
||||
(i1 timm:$clamp),
|
||||
(i32 timm:$omod)))]>;
|
||||
[(set f32:$vdst, (AMDGPUinterp_p1ll_f16 f32:$src0, (i32 imm:$attrchan),
|
||||
(i32 imm:$attr),
|
||||
(i32 imm:$src0_modifiers),
|
||||
(i1 imm:$high),
|
||||
(i1 imm:$clamp),
|
||||
(i32 imm:$omod)))]>;
|
||||
def V_INTERP_P1LV_F16 : VOP3Interp <"v_interp_p1lv_f16", VOP3_INTERP16<[f32, f32, i32, f16]>,
|
||||
[(set f32:$vdst, (AMDGPUinterp_p1lv_f16 f32:$src0, (i32 timm:$attrchan),
|
||||
(i32 timm:$attr),
|
||||
(i32 timm:$src0_modifiers),
|
||||
[(set f32:$vdst, (AMDGPUinterp_p1lv_f16 f32:$src0, (i32 imm:$attrchan),
|
||||
(i32 imm:$attr),
|
||||
(i32 imm:$src0_modifiers),
|
||||
(f32 VRegSrc_32:$src2),
|
||||
(i32 timm:$src2_modifiers),
|
||||
(i1 timm:$high),
|
||||
(i1 timm:$clamp),
|
||||
(i32 timm:$omod)))]>;
|
||||
(i32 imm:$src2_modifiers),
|
||||
(i1 imm:$high),
|
||||
(i1 imm:$clamp),
|
||||
(i32 imm:$omod)))]>;
|
||||
} // End Uses = [M0, EXEC], FPDPRounding = 1
|
||||
|
||||
} // End SubtargetPredicate = Has16BitInsts, isCommutable = 1
|
||||
|
@ -642,11 +642,11 @@ let SubtargetPredicate = isGFX10Plus in {
|
|||
} // End $vdst = $vdst_in, DisableEncoding $vdst_in
|
||||
|
||||
def : GCNPat<
|
||||
(int_amdgcn_permlane16 i32:$vdst_in, i32:$src0, i32:$src1, i32:$src2, timm:$fi, timm:$bc),
|
||||
(int_amdgcn_permlane16 i32:$vdst_in, i32:$src0, i32:$src1, i32:$src2, imm:$fi, imm:$bc),
|
||||
(V_PERMLANE16_B32 (as_i1imm $fi), $src0, (as_i1imm $bc), $src1, 0, $src2, $vdst_in)
|
||||
>;
|
||||
def : GCNPat<
|
||||
(int_amdgcn_permlanex16 i32:$vdst_in, i32:$src0, i32:$src1, i32:$src2, timm:$fi, timm:$bc),
|
||||
(int_amdgcn_permlanex16 i32:$vdst_in, i32:$src0, i32:$src1, i32:$src2, imm:$fi, imm:$bc),
|
||||
(V_PERMLANEX16_B32 (as_i1imm $fi), $src0, (as_i1imm $bc), $src1, 0, $src2, $vdst_in)
|
||||
>;
|
||||
} // End SubtargetPredicate = isGFX10Plus
|
||||
|
|
|
@ -3116,12 +3116,12 @@ ARMTargetLowering::LowerGlobalTLSAddressWindows(SDValue Op,
|
|||
|
||||
// Load the current TEB (thread environment block)
|
||||
SDValue Ops[] = {Chain,
|
||||
DAG.getTargetConstant(Intrinsic::arm_mrc, DL, MVT::i32),
|
||||
DAG.getTargetConstant(15, DL, MVT::i32),
|
||||
DAG.getTargetConstant(0, DL, MVT::i32),
|
||||
DAG.getTargetConstant(13, DL, MVT::i32),
|
||||
DAG.getTargetConstant(0, DL, MVT::i32),
|
||||
DAG.getTargetConstant(2, DL, MVT::i32)};
|
||||
DAG.getConstant(Intrinsic::arm_mrc, DL, MVT::i32),
|
||||
DAG.getConstant(15, DL, MVT::i32),
|
||||
DAG.getConstant(0, DL, MVT::i32),
|
||||
DAG.getConstant(13, DL, MVT::i32),
|
||||
DAG.getConstant(0, DL, MVT::i32),
|
||||
DAG.getConstant(2, DL, MVT::i32)};
|
||||
SDValue CurrentTEB = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
|
||||
DAG.getVTList(MVT::i32, MVT::Other), Ops);
|
||||
|
||||
|
@ -8910,12 +8910,12 @@ static void ReplaceREADCYCLECOUNTER(SDNode *N,
|
|||
// Under Power Management extensions, the cycle-count is:
|
||||
// mrc p15, #0, <Rt>, c9, c13, #0
|
||||
SDValue Ops[] = { N->getOperand(0), // Chain
|
||||
DAG.getTargetConstant(Intrinsic::arm_mrc, DL, MVT::i32),
|
||||
DAG.getTargetConstant(15, DL, MVT::i32),
|
||||
DAG.getTargetConstant(0, DL, MVT::i32),
|
||||
DAG.getTargetConstant(9, DL, MVT::i32),
|
||||
DAG.getTargetConstant(13, DL, MVT::i32),
|
||||
DAG.getTargetConstant(0, DL, MVT::i32)
|
||||
DAG.getConstant(Intrinsic::arm_mrc, DL, MVT::i32),
|
||||
DAG.getConstant(15, DL, MVT::i32),
|
||||
DAG.getConstant(0, DL, MVT::i32),
|
||||
DAG.getConstant(9, DL, MVT::i32),
|
||||
DAG.getConstant(13, DL, MVT::i32),
|
||||
DAG.getConstant(0, DL, MVT::i32)
|
||||
};
|
||||
|
||||
SDValue Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
|
||||
|
|
|
@ -5110,8 +5110,8 @@ def SWPB: AIswp<1, (outs GPRnopc:$Rt),
|
|||
def CDP : ABI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1,
|
||||
c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2),
|
||||
NoItinerary, "cdp", "\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
|
||||
[(int_arm_cdp timm:$cop, timm:$opc1, timm:$CRd, timm:$CRn,
|
||||
timm:$CRm, timm:$opc2)]>,
|
||||
[(int_arm_cdp imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn,
|
||||
imm:$CRm, imm:$opc2)]>,
|
||||
Requires<[IsARM,PreV8]> {
|
||||
bits<4> opc1;
|
||||
bits<4> CRn;
|
||||
|
@ -5134,8 +5134,8 @@ def CDP : ABI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1,
|
|||
def CDP2 : ABXI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1,
|
||||
c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2),
|
||||
NoItinerary, "cdp2\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
|
||||
[(int_arm_cdp2 timm:$cop, timm:$opc1, timm:$CRd, timm:$CRn,
|
||||
timm:$CRm, timm:$opc2)]>,
|
||||
[(int_arm_cdp2 imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn,
|
||||
imm:$CRm, imm:$opc2)]>,
|
||||
Requires<[IsARM,PreV8]> {
|
||||
let Inst{31-28} = 0b1111;
|
||||
bits<4> opc1;
|
||||
|
@ -5314,15 +5314,15 @@ multiclass LdSt2Cop<bit load, bit Dbit, string asm, list<dag> pattern> {
|
|||
}
|
||||
}
|
||||
|
||||
defm LDC : LdStCop <1, 0, "ldc", [(int_arm_ldc timm:$cop, timm:$CRd, addrmode5:$addr)]>;
|
||||
defm LDCL : LdStCop <1, 1, "ldcl", [(int_arm_ldcl timm:$cop, timm:$CRd, addrmode5:$addr)]>;
|
||||
defm LDC2 : LdSt2Cop<1, 0, "ldc2", [(int_arm_ldc2 timm:$cop, timm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>;
|
||||
defm LDC2L : LdSt2Cop<1, 1, "ldc2l", [(int_arm_ldc2l timm:$cop, timm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>;
|
||||
defm LDC : LdStCop <1, 0, "ldc", [(int_arm_ldc imm:$cop, imm:$CRd, addrmode5:$addr)]>;
|
||||
defm LDCL : LdStCop <1, 1, "ldcl", [(int_arm_ldcl imm:$cop, imm:$CRd, addrmode5:$addr)]>;
|
||||
defm LDC2 : LdSt2Cop<1, 0, "ldc2", [(int_arm_ldc2 imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>;
|
||||
defm LDC2L : LdSt2Cop<1, 1, "ldc2l", [(int_arm_ldc2l imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>;
|
||||
|
||||
defm STC : LdStCop <0, 0, "stc", [(int_arm_stc timm:$cop, timm:$CRd, addrmode5:$addr)]>;
|
||||
defm STCL : LdStCop <0, 1, "stcl", [(int_arm_stcl timm:$cop, timm:$CRd, addrmode5:$addr)]>;
|
||||
defm STC2 : LdSt2Cop<0, 0, "stc2", [(int_arm_stc2 timm:$cop, timm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>;
|
||||
defm STC2L : LdSt2Cop<0, 1, "stc2l", [(int_arm_stc2l timm:$cop, timm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>;
|
||||
defm STC : LdStCop <0, 0, "stc", [(int_arm_stc imm:$cop, imm:$CRd, addrmode5:$addr)]>;
|
||||
defm STCL : LdStCop <0, 1, "stcl", [(int_arm_stcl imm:$cop, imm:$CRd, addrmode5:$addr)]>;
|
||||
defm STC2 : LdSt2Cop<0, 0, "stc2", [(int_arm_stc2 imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>;
|
||||
defm STC2L : LdSt2Cop<0, 1, "stc2l", [(int_arm_stc2l imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>;
|
||||
|
||||
} // DecoderNamespace = "CoProc"
|
||||
|
||||
|
@ -5358,8 +5358,8 @@ def MCR : MovRCopro<"mcr", 0 /* from ARM core register to coprocessor */,
|
|||
(outs),
|
||||
(ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
|
||||
c_imm:$CRm, imm0_7:$opc2),
|
||||
[(int_arm_mcr timm:$cop, timm:$opc1, GPR:$Rt, timm:$CRn,
|
||||
timm:$CRm, timm:$opc2)]>,
|
||||
[(int_arm_mcr imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
|
||||
imm:$CRm, imm:$opc2)]>,
|
||||
ComplexDeprecationPredicate<"MCR">;
|
||||
def : ARMInstAlias<"mcr${p} $cop, $opc1, $Rt, $CRn, $CRm",
|
||||
(MCR p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
|
||||
|
@ -5372,8 +5372,8 @@ def : ARMInstAlias<"mrc${p} $cop, $opc1, $Rt, $CRn, $CRm",
|
|||
(MRC GPRwithAPSR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
|
||||
c_imm:$CRm, 0, pred:$p)>;
|
||||
|
||||
def : ARMPat<(int_arm_mrc timm:$cop, timm:$opc1, timm:$CRn, timm:$CRm, timm:$opc2),
|
||||
(MRC p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2)>;
|
||||
def : ARMPat<(int_arm_mrc imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2),
|
||||
(MRC imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>;
|
||||
|
||||
class MovRCopro2<string opc, bit direction, dag oops, dag iops,
|
||||
list<dag> pattern>
|
||||
|
@ -5404,8 +5404,8 @@ def MCR2 : MovRCopro2<"mcr2", 0 /* from ARM core register to coprocessor */,
|
|||
(outs),
|
||||
(ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
|
||||
c_imm:$CRm, imm0_7:$opc2),
|
||||
[(int_arm_mcr2 timm:$cop, timm:$opc1, GPR:$Rt, timm:$CRn,
|
||||
timm:$CRm, timm:$opc2)]>,
|
||||
[(int_arm_mcr2 imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
|
||||
imm:$CRm, imm:$opc2)]>,
|
||||
Requires<[IsARM,PreV8]>;
|
||||
def : ARMInstAlias<"mcr2 $cop, $opc1, $Rt, $CRn, $CRm",
|
||||
(MCR2 p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
|
||||
|
@ -5419,9 +5419,9 @@ def : ARMInstAlias<"mrc2 $cop, $opc1, $Rt, $CRn, $CRm",
|
|||
(MRC2 GPRwithAPSR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
|
||||
c_imm:$CRm, 0)>;
|
||||
|
||||
def : ARMV5TPat<(int_arm_mrc2 timm:$cop, timm:$opc1, timm:$CRn,
|
||||
timm:$CRm, timm:$opc2),
|
||||
(MRC2 p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2)>;
|
||||
def : ARMV5TPat<(int_arm_mrc2 imm:$cop, imm:$opc1, imm:$CRn,
|
||||
imm:$CRm, imm:$opc2),
|
||||
(MRC2 imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>;
|
||||
|
||||
class MovRRCopro<string opc, bit direction, dag oops, dag iops, list<dag>
|
||||
pattern = []>
|
||||
|
@ -5447,8 +5447,8 @@ class MovRRCopro<string opc, bit direction, dag oops, dag iops, list<dag>
|
|||
def MCRR : MovRRCopro<"mcrr", 0 /* from ARM core register to coprocessor */,
|
||||
(outs), (ins p_imm:$cop, imm0_15:$opc1, GPRnopc:$Rt,
|
||||
GPRnopc:$Rt2, c_imm:$CRm),
|
||||
[(int_arm_mcrr timm:$cop, timm:$opc1, GPRnopc:$Rt,
|
||||
GPRnopc:$Rt2, timm:$CRm)]>;
|
||||
[(int_arm_mcrr imm:$cop, imm:$opc1, GPRnopc:$Rt,
|
||||
GPRnopc:$Rt2, imm:$CRm)]>;
|
||||
def MRRC : MovRRCopro<"mrrc", 1 /* from coprocessor to ARM core register */,
|
||||
(outs GPRnopc:$Rt, GPRnopc:$Rt2),
|
||||
(ins p_imm:$cop, imm0_15:$opc1, c_imm:$CRm), []>;
|
||||
|
@ -5480,8 +5480,8 @@ class MovRRCopro2<string opc, bit direction, dag oops, dag iops,
|
|||
def MCRR2 : MovRRCopro2<"mcrr2", 0 /* from ARM core register to coprocessor */,
|
||||
(outs), (ins p_imm:$cop, imm0_15:$opc1, GPRnopc:$Rt,
|
||||
GPRnopc:$Rt2, c_imm:$CRm),
|
||||
[(int_arm_mcrr2 timm:$cop, timm:$opc1, GPRnopc:$Rt,
|
||||
GPRnopc:$Rt2, timm:$CRm)]>;
|
||||
[(int_arm_mcrr2 imm:$cop, imm:$opc1, GPRnopc:$Rt,
|
||||
GPRnopc:$Rt2, imm:$CRm)]>;
|
||||
|
||||
def MRRC2 : MovRRCopro2<"mrrc2", 1 /* from coprocessor to ARM core register */,
|
||||
(outs GPRnopc:$Rt, GPRnopc:$Rt2),
|
||||
|
@ -6159,7 +6159,7 @@ def ITasm : ARMAsmPseudo<"it$mask $cc", (ins it_pred:$cc, it_mask:$mask)>,
|
|||
let mayLoad = 1, mayStore =1, hasSideEffects = 1 in
|
||||
def SPACE : PseudoInst<(outs GPR:$Rd), (ins i32imm:$size, GPR:$Rn),
|
||||
NoItinerary,
|
||||
[(set GPR:$Rd, (int_arm_space timm:$size, GPR:$Rn))]>;
|
||||
[(set GPR:$Rd, (int_arm_space imm:$size, GPR:$Rn))]>;
|
||||
|
||||
//===----------------------------------
|
||||
// Atomic cmpxchg for -O0
|
||||
|
|
|
@ -4175,15 +4175,15 @@ multiclass t2LdStCop<bits<4> op31_28, bit load, bit Dbit, string asm, list<dag>
|
|||
}
|
||||
|
||||
let DecoderNamespace = "Thumb2CoProc" in {
|
||||
defm t2LDC : t2LdStCop<0b1110, 1, 0, "ldc", [(int_arm_ldc timm:$cop, timm:$CRd, addrmode5:$addr)]>;
|
||||
defm t2LDCL : t2LdStCop<0b1110, 1, 1, "ldcl", [(int_arm_ldcl timm:$cop, timm:$CRd, addrmode5:$addr)]>;
|
||||
defm t2LDC2 : t2LdStCop<0b1111, 1, 0, "ldc2", [(int_arm_ldc2 timm:$cop, timm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>;
|
||||
defm t2LDC2L : t2LdStCop<0b1111, 1, 1, "ldc2l", [(int_arm_ldc2l timm:$cop, timm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>;
|
||||
defm t2LDC : t2LdStCop<0b1110, 1, 0, "ldc", [(int_arm_ldc imm:$cop, imm:$CRd, addrmode5:$addr)]>;
|
||||
defm t2LDCL : t2LdStCop<0b1110, 1, 1, "ldcl", [(int_arm_ldcl imm:$cop, imm:$CRd, addrmode5:$addr)]>;
|
||||
defm t2LDC2 : t2LdStCop<0b1111, 1, 0, "ldc2", [(int_arm_ldc2 imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>;
|
||||
defm t2LDC2L : t2LdStCop<0b1111, 1, 1, "ldc2l", [(int_arm_ldc2l imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>;
|
||||
|
||||
defm t2STC : t2LdStCop<0b1110, 0, 0, "stc", [(int_arm_stc timm:$cop, timm:$CRd, addrmode5:$addr)]>;
|
||||
defm t2STCL : t2LdStCop<0b1110, 0, 1, "stcl", [(int_arm_stcl timm:$cop, timm:$CRd, addrmode5:$addr)]>;
|
||||
defm t2STC2 : t2LdStCop<0b1111, 0, 0, "stc2", [(int_arm_stc2 timm:$cop, timm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>;
|
||||
defm t2STC2L : t2LdStCop<0b1111, 0, 1, "stc2l", [(int_arm_stc2l timm:$cop, timm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>;
|
||||
defm t2STC : t2LdStCop<0b1110, 0, 0, "stc", [(int_arm_stc imm:$cop, imm:$CRd, addrmode5:$addr)]>;
|
||||
defm t2STCL : t2LdStCop<0b1110, 0, 1, "stcl", [(int_arm_stcl imm:$cop, imm:$CRd, addrmode5:$addr)]>;
|
||||
defm t2STC2 : t2LdStCop<0b1111, 0, 0, "stc2", [(int_arm_stc2 imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>;
|
||||
defm t2STC2L : t2LdStCop<0b1111, 0, 1, "stc2l", [(int_arm_stc2l imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>;
|
||||
}
|
||||
|
||||
|
||||
|
@ -4368,8 +4368,8 @@ def t2MCR : t2MovRCopro<0b1110, "mcr", 0,
|
|||
(outs),
|
||||
(ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
|
||||
c_imm:$CRm, imm0_7:$opc2),
|
||||
[(int_arm_mcr timm:$cop, timm:$opc1, GPR:$Rt, timm:$CRn,
|
||||
timm:$CRm, timm:$opc2)]>,
|
||||
[(int_arm_mcr imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
|
||||
imm:$CRm, imm:$opc2)]>,
|
||||
ComplexDeprecationPredicate<"MCR">;
|
||||
def : t2InstAlias<"mcr${p} $cop, $opc1, $Rt, $CRn, $CRm",
|
||||
(t2MCR p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
|
||||
|
@ -4377,8 +4377,8 @@ def : t2InstAlias<"mcr${p} $cop, $opc1, $Rt, $CRn, $CRm",
|
|||
def t2MCR2 : t2MovRCopro<0b1111, "mcr2", 0,
|
||||
(outs), (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
|
||||
c_imm:$CRm, imm0_7:$opc2),
|
||||
[(int_arm_mcr2 timm:$cop, timm:$opc1, GPR:$Rt, timm:$CRn,
|
||||
timm:$CRm, timm:$opc2)]> {
|
||||
[(int_arm_mcr2 imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
|
||||
imm:$CRm, imm:$opc2)]> {
|
||||
let Predicates = [IsThumb2, PreV8];
|
||||
}
|
||||
def : t2InstAlias<"mcr2${p} $cop, $opc1, $Rt, $CRn, $CRm",
|
||||
|
@ -4402,24 +4402,24 @@ def : t2InstAlias<"mrc2${p} $cop, $opc1, $Rt, $CRn, $CRm",
|
|||
(t2MRC2 GPRwithAPSR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
|
||||
c_imm:$CRm, 0, pred:$p)>;
|
||||
|
||||
def : T2v6Pat<(int_arm_mrc timm:$cop, timm:$opc1, timm:$CRn, timm:$CRm, timm:$opc2),
|
||||
(t2MRC p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2)>;
|
||||
def : T2v6Pat<(int_arm_mrc imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2),
|
||||
(t2MRC imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>;
|
||||
|
||||
def : T2v6Pat<(int_arm_mrc2 timm:$cop, timm:$opc1, timm:$CRn, timm:$CRm, timm:$opc2),
|
||||
(t2MRC2 p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2)>;
|
||||
def : T2v6Pat<(int_arm_mrc2 imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2),
|
||||
(t2MRC2 imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>;
|
||||
|
||||
|
||||
/* from ARM core register to coprocessor */
|
||||
def t2MCRR : t2MovRRCopro<0b1110, "mcrr", 0, (outs),
|
||||
(ins p_imm:$cop, imm0_15:$opc1, GPR:$Rt, GPR:$Rt2,
|
||||
c_imm:$CRm),
|
||||
[(int_arm_mcrr timm:$cop, timm:$opc1, GPR:$Rt, GPR:$Rt2,
|
||||
timm:$CRm)]>;
|
||||
[(int_arm_mcrr imm:$cop, imm:$opc1, GPR:$Rt, GPR:$Rt2,
|
||||
imm:$CRm)]>;
|
||||
def t2MCRR2 : t2MovRRCopro<0b1111, "mcrr2", 0, (outs),
|
||||
(ins p_imm:$cop, imm0_15:$opc1, GPR:$Rt, GPR:$Rt2,
|
||||
c_imm:$CRm),
|
||||
[(int_arm_mcrr2 timm:$cop, timm:$opc1, GPR:$Rt,
|
||||
GPR:$Rt2, timm:$CRm)]> {
|
||||
[(int_arm_mcrr2 imm:$cop, imm:$opc1, GPR:$Rt,
|
||||
GPR:$Rt2, imm:$CRm)]> {
|
||||
let Predicates = [IsThumb2, PreV8];
|
||||
}
|
||||
|
||||
|
@ -4439,8 +4439,8 @@ def t2MRRC2 : t2MovRRCopro<0b1111, "mrrc2", 1, (outs GPR:$Rt, GPR:$Rt2),
|
|||
def t2CDP : T2Cop<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1,
|
||||
c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2),
|
||||
"cdp", "\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
|
||||
[(int_arm_cdp timm:$cop, timm:$opc1, timm:$CRd, timm:$CRn,
|
||||
timm:$CRm, timm:$opc2)]> {
|
||||
[(int_arm_cdp imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn,
|
||||
imm:$CRm, imm:$opc2)]> {
|
||||
let Inst{27-24} = 0b1110;
|
||||
|
||||
bits<4> opc1;
|
||||
|
@ -4465,8 +4465,8 @@ def t2CDP : T2Cop<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1,
|
|||
def t2CDP2 : T2Cop<0b1111, (outs), (ins p_imm:$cop, imm0_15:$opc1,
|
||||
c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2),
|
||||
"cdp2", "\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
|
||||
[(int_arm_cdp2 timm:$cop, timm:$opc1, timm:$CRd, timm:$CRn,
|
||||
timm:$CRm, timm:$opc2)]> {
|
||||
[(int_arm_cdp2 imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn,
|
||||
imm:$CRm, imm:$opc2)]> {
|
||||
let Inst{27-24} = 0b1110;
|
||||
|
||||
bits<4> opc1;
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -8,125 +8,120 @@
|
|||
// Automatically generated file, please consult code owner before editing.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
multiclass ImmOpPred<code pred, ValueType vt = i32> {
|
||||
def "" : PatLeaf<(vt imm), pred>;
|
||||
def _timm : PatLeaf<(vt timm), pred>;
|
||||
}
|
||||
|
||||
def s4_0ImmOperand : AsmOperandClass { let Name = "s4_0Imm"; let RenderMethod = "addSignedImmOperands"; }
|
||||
def s4_0Imm : Operand<i32> { let ParserMatchClass = s4_0ImmOperand; let DecoderMethod = "s4_0ImmDecoder"; }
|
||||
defm s4_0ImmPred : ImmOpPred<[{ return isShiftedInt<4, 0>(N->getSExtValue());}]>;
|
||||
def s4_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<4, 0>(N->getSExtValue());}]>;
|
||||
def s29_3ImmOperand : AsmOperandClass { let Name = "s29_3Imm"; let RenderMethod = "addSignedImmOperands"; }
|
||||
def s29_3Imm : Operand<i32> { let ParserMatchClass = s29_3ImmOperand; let DecoderMethod = "s29_3ImmDecoder"; }
|
||||
defm s29_3ImmPred : ImmOpPred<[{ return isShiftedInt<32, 3>(N->getSExtValue());}]>;
|
||||
def s29_3ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<32, 3>(N->getSExtValue());}]>;
|
||||
def u6_0ImmOperand : AsmOperandClass { let Name = "u6_0Imm"; let RenderMethod = "addImmOperands"; }
|
||||
def u6_0Imm : Operand<i32> { let ParserMatchClass = u6_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
|
||||
defm u6_0ImmPred : ImmOpPred<[{ return isShiftedUInt<6, 0>(N->getSExtValue());}]>;
|
||||
def u6_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<6, 0>(N->getSExtValue());}]>;
|
||||
def a30_2ImmOperand : AsmOperandClass { let Name = "a30_2Imm"; let RenderMethod = "addSignedImmOperands"; }
|
||||
def a30_2Imm : Operand<i32> { let ParserMatchClass = a30_2ImmOperand; let DecoderMethod = "brtargetDecoder"; let PrintMethod = "printBrtarget"; }
|
||||
defm a30_2ImmPred : ImmOpPred<[{ return isShiftedInt<32, 2>(N->getSExtValue());}]>;
|
||||
def a30_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<32, 2>(N->getSExtValue());}]>;
|
||||
def u29_3ImmOperand : AsmOperandClass { let Name = "u29_3Imm"; let RenderMethod = "addImmOperands"; }
|
||||
def u29_3Imm : Operand<i32> { let ParserMatchClass = u29_3ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
|
||||
defm u29_3ImmPred : ImmOpPred<[{ return isShiftedUInt<32, 3>(N->getSExtValue());}]>;
|
||||
def u29_3ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<32, 3>(N->getSExtValue());}]>;
|
||||
def s8_0ImmOperand : AsmOperandClass { let Name = "s8_0Imm"; let RenderMethod = "addSignedImmOperands"; }
|
||||
def s8_0Imm : Operand<i32> { let ParserMatchClass = s8_0ImmOperand; let DecoderMethod = "s8_0ImmDecoder"; }
|
||||
defm s8_0ImmPred : ImmOpPred<[{ return isShiftedInt<8, 0>(N->getSExtValue());}]>;
|
||||
def s8_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<8, 0>(N->getSExtValue());}]>;
|
||||
def u32_0ImmOperand : AsmOperandClass { let Name = "u32_0Imm"; let RenderMethod = "addImmOperands"; }
|
||||
def u32_0Imm : Operand<i32> { let ParserMatchClass = u32_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
|
||||
defm u32_0ImmPred : ImmOpPred<[{ return isShiftedUInt<32, 0>(N->getSExtValue());}]>;
|
||||
def u32_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<32, 0>(N->getSExtValue());}]>;
|
||||
def u4_2ImmOperand : AsmOperandClass { let Name = "u4_2Imm"; let RenderMethod = "addImmOperands"; }
|
||||
def u4_2Imm : Operand<i32> { let ParserMatchClass = u4_2ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
|
||||
defm u4_2ImmPred : ImmOpPred<[{ return isShiftedUInt<4, 2>(N->getSExtValue());}]>;
|
||||
def u4_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<4, 2>(N->getSExtValue());}]>;
|
||||
def u3_0ImmOperand : AsmOperandClass { let Name = "u3_0Imm"; let RenderMethod = "addImmOperands"; }
|
||||
def u3_0Imm : Operand<i32> { let ParserMatchClass = u3_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
|
||||
defm u3_0ImmPred : ImmOpPred<[{ return isShiftedUInt<3, 0>(N->getSExtValue());}]>;
|
||||
def u3_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<3, 0>(N->getSExtValue());}]>;
|
||||
def b15_2ImmOperand : AsmOperandClass { let Name = "b15_2Imm"; let RenderMethod = "addSignedImmOperands"; }
|
||||
def b15_2Imm : Operand<OtherVT> { let ParserMatchClass = b15_2ImmOperand; let DecoderMethod = "brtargetDecoder"; let PrintMethod = "printBrtarget"; }
|
||||
defm b15_2ImmPred : ImmOpPred<[{ return isShiftedInt<15, 2>(N->getSExtValue());}]>;
|
||||
def b15_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<15, 2>(N->getSExtValue());}]>;
|
||||
def u11_3ImmOperand : AsmOperandClass { let Name = "u11_3Imm"; let RenderMethod = "addImmOperands"; }
|
||||
def u11_3Imm : Operand<i32> { let ParserMatchClass = u11_3ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
|
||||
defm u11_3ImmPred : ImmOpPred<[{ return isShiftedUInt<11, 3>(N->getSExtValue());}]>;
|
||||
def u11_3ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<11, 3>(N->getSExtValue());}]>;
|
||||
def s4_3ImmOperand : AsmOperandClass { let Name = "s4_3Imm"; let RenderMethod = "addSignedImmOperands"; }
|
||||
def s4_3Imm : Operand<i32> { let ParserMatchClass = s4_3ImmOperand; let DecoderMethod = "s4_3ImmDecoder"; }
|
||||
defm s4_3ImmPred : ImmOpPred<[{ return isShiftedInt<4, 3>(N->getSExtValue());}]>;
|
||||
def s4_3ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<4, 3>(N->getSExtValue());}]>;
|
||||
def m32_0ImmOperand : AsmOperandClass { let Name = "m32_0Imm"; let RenderMethod = "addImmOperands"; }
|
||||
def m32_0Imm : Operand<i32> { let ParserMatchClass = m32_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
|
||||
defm m32_0ImmPred : ImmOpPred<[{ return isShiftedInt<32, 0>(N->getSExtValue());}]>;
|
||||
def m32_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<32, 0>(N->getSExtValue());}]>;
|
||||
def u3_1ImmOperand : AsmOperandClass { let Name = "u3_1Imm"; let RenderMethod = "addImmOperands"; }
|
||||
def u3_1Imm : Operand<i32> { let ParserMatchClass = u3_1ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
|
||||
defm u3_1ImmPred : ImmOpPred<[{ return isShiftedUInt<3, 1>(N->getSExtValue());}]>;
|
||||
def u3_1ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<3, 1>(N->getSExtValue());}]>;
|
||||
def u1_0ImmOperand : AsmOperandClass { let Name = "u1_0Imm"; let RenderMethod = "addImmOperands"; }
|
||||
def u1_0Imm : Operand<i32> { let ParserMatchClass = u1_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
|
||||
defm u1_0ImmPred : ImmOpPred<[{ return isShiftedUInt<1, 0>(N->getSExtValue());}]>;
|
||||
def u1_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<1, 0>(N->getSExtValue());}]>;
|
||||
def s31_1ImmOperand : AsmOperandClass { let Name = "s31_1Imm"; let RenderMethod = "addSignedImmOperands"; }
|
||||
def s31_1Imm : Operand<i32> { let ParserMatchClass = s31_1ImmOperand; let DecoderMethod = "s31_1ImmDecoder"; }
|
||||
defm s31_1ImmPred : ImmOpPred<[{ return isShiftedInt<32, 1>(N->getSExtValue());}]>;
|
||||
def s31_1ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<32, 1>(N->getSExtValue());}]>;
|
||||
def s3_0ImmOperand : AsmOperandClass { let Name = "s3_0Imm"; let RenderMethod = "addSignedImmOperands"; }
|
||||
def s3_0Imm : Operand<i32> { let ParserMatchClass = s3_0ImmOperand; let DecoderMethod = "s3_0ImmDecoder"; }
|
||||
defm s3_0ImmPred : ImmOpPred<[{ return isShiftedInt<3, 0>(N->getSExtValue());}]>;
|
||||
def s3_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<3, 0>(N->getSExtValue());}]>;
|
||||
def s30_2ImmOperand : AsmOperandClass { let Name = "s30_2Imm"; let RenderMethod = "addSignedImmOperands"; }
|
||||
def s30_2Imm : Operand<i32> { let ParserMatchClass = s30_2ImmOperand; let DecoderMethod = "s30_2ImmDecoder"; }
|
||||
defm s30_2ImmPred : ImmOpPred<[{ return isShiftedInt<32, 2>(N->getSExtValue());}]>;
|
||||
def s30_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<32, 2>(N->getSExtValue());}]>;
|
||||
def u4_0ImmOperand : AsmOperandClass { let Name = "u4_0Imm"; let RenderMethod = "addImmOperands"; }
|
||||
def u4_0Imm : Operand<i32> { let ParserMatchClass = u4_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
|
||||
defm u4_0ImmPred : ImmOpPred<[{ return isShiftedUInt<4, 0>(N->getSExtValue());}]>;
|
||||
def u4_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<4, 0>(N->getSExtValue());}]>;
|
||||
def s6_0ImmOperand : AsmOperandClass { let Name = "s6_0Imm"; let RenderMethod = "addSignedImmOperands"; }
|
||||
def s6_0Imm : Operand<i32> { let ParserMatchClass = s6_0ImmOperand; let DecoderMethod = "s6_0ImmDecoder"; }
|
||||
defm s6_0ImmPred : ImmOpPred<[{ return isShiftedInt<6, 0>(N->getSExtValue());}]>;
|
||||
def s6_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<6, 0>(N->getSExtValue());}]>;
|
||||
def u5_3ImmOperand : AsmOperandClass { let Name = "u5_3Imm"; let RenderMethod = "addImmOperands"; }
|
||||
def u5_3Imm : Operand<i32> { let ParserMatchClass = u5_3ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
|
||||
defm u5_3ImmPred : ImmOpPred<[{ return isShiftedUInt<5, 3>(N->getSExtValue());}]>;
|
||||
def u5_3ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<5, 3>(N->getSExtValue());}]>;
|
||||
def s32_0ImmOperand : AsmOperandClass { let Name = "s32_0Imm"; let RenderMethod = "addSignedImmOperands"; }
|
||||
def s32_0Imm : Operand<i32> { let ParserMatchClass = s32_0ImmOperand; let DecoderMethod = "s32_0ImmDecoder"; }
|
||||
defm s32_0ImmPred : ImmOpPred<[{ return isShiftedInt<32, 0>(N->getSExtValue());}]>;
|
||||
def s32_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<32, 0>(N->getSExtValue());}]>;
|
||||
def s6_3ImmOperand : AsmOperandClass { let Name = "s6_3Imm"; let RenderMethod = "addSignedImmOperands"; }
|
||||
def s6_3Imm : Operand<i32> { let ParserMatchClass = s6_3ImmOperand; let DecoderMethod = "s6_3ImmDecoder"; }
|
||||
defm s6_3ImmPred : ImmOpPred<[{ return isShiftedInt<6, 3>(N->getSExtValue());}]>;
|
||||
def s6_3ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<6, 3>(N->getSExtValue());}]>;
|
||||
def u10_0ImmOperand : AsmOperandClass { let Name = "u10_0Imm"; let RenderMethod = "addImmOperands"; }
|
||||
def u10_0Imm : Operand<i32> { let ParserMatchClass = u10_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
|
||||
defm u10_0ImmPred : ImmOpPred<[{ return isShiftedUInt<10, 0>(N->getSExtValue());}]>;
|
||||
def u10_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<10, 0>(N->getSExtValue());}]>;
|
||||
def u31_1ImmOperand : AsmOperandClass { let Name = "u31_1Imm"; let RenderMethod = "addImmOperands"; }
|
||||
def u31_1Imm : Operand<i32> { let ParserMatchClass = u31_1ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
|
||||
defm u31_1ImmPred : ImmOpPred<[{ return isShiftedUInt<32, 1>(N->getSExtValue());}]>;
|
||||
def u31_1ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<32, 1>(N->getSExtValue());}]>;
|
||||
def s4_1ImmOperand : AsmOperandClass { let Name = "s4_1Imm"; let RenderMethod = "addSignedImmOperands"; }
|
||||
def s4_1Imm : Operand<i32> { let ParserMatchClass = s4_1ImmOperand; let DecoderMethod = "s4_1ImmDecoder"; }
|
||||
defm s4_1ImmPred : ImmOpPred<[{ return isShiftedInt<4, 1>(N->getSExtValue());}]>;
|
||||
def s4_1ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<4, 1>(N->getSExtValue());}]>;
|
||||
def u16_0ImmOperand : AsmOperandClass { let Name = "u16_0Imm"; let RenderMethod = "addImmOperands"; }
|
||||
def u16_0Imm : Operand<i32> { let ParserMatchClass = u16_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
|
||||
defm u16_0ImmPred : ImmOpPred<[{ return isShiftedUInt<16, 0>(N->getSExtValue());}]>;
|
||||
def u16_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<16, 0>(N->getSExtValue());}]>;
|
||||
def u6_1ImmOperand : AsmOperandClass { let Name = "u6_1Imm"; let RenderMethod = "addImmOperands"; }
|
||||
def u6_1Imm : Operand<i32> { let ParserMatchClass = u6_1ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
|
||||
defm u6_1ImmPred : ImmOpPred<[{ return isShiftedUInt<6, 1>(N->getSExtValue());}]>;
|
||||
def u6_1ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<6, 1>(N->getSExtValue());}]>;
|
||||
def u5_2ImmOperand : AsmOperandClass { let Name = "u5_2Imm"; let RenderMethod = "addImmOperands"; }
|
||||
def u5_2Imm : Operand<i32> { let ParserMatchClass = u5_2ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
|
||||
defm u5_2ImmPred : ImmOpPred<[{ return isShiftedUInt<5, 2>(N->getSExtValue());}]>;
|
||||
def u5_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<5, 2>(N->getSExtValue());}]>;
|
||||
def u26_6ImmOperand : AsmOperandClass { let Name = "u26_6Imm"; let RenderMethod = "addImmOperands"; }
|
||||
def u26_6Imm : Operand<i32> { let ParserMatchClass = u26_6ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
|
||||
defm u26_6ImmPred : ImmOpPred<[{ return isShiftedUInt<26, 6>(N->getSExtValue());}]>;
|
||||
def u26_6ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<26, 6>(N->getSExtValue());}]>;
|
||||
def u6_2ImmOperand : AsmOperandClass { let Name = "u6_2Imm"; let RenderMethod = "addImmOperands"; }
|
||||
def u6_2Imm : Operand<i32> { let ParserMatchClass = u6_2ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
|
||||
defm u6_2ImmPred : ImmOpPred<[{ return isShiftedUInt<6, 2>(N->getSExtValue());}]>;
|
||||
def u6_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<6, 2>(N->getSExtValue());}]>;
|
||||
def u7_0ImmOperand : AsmOperandClass { let Name = "u7_0Imm"; let RenderMethod = "addImmOperands"; }
|
||||
def u7_0Imm : Operand<i32> { let ParserMatchClass = u7_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
|
||||
defm u7_0ImmPred : ImmOpPred<[{ return isShiftedUInt<7, 0>(N->getSExtValue());}]>;
|
||||
def u7_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<7, 0>(N->getSExtValue());}]>;
|
||||
def b13_2ImmOperand : AsmOperandClass { let Name = "b13_2Imm"; let RenderMethod = "addSignedImmOperands"; }
|
||||
def b13_2Imm : Operand<OtherVT> { let ParserMatchClass = b13_2ImmOperand; let DecoderMethod = "brtargetDecoder"; let PrintMethod = "printBrtarget"; }
|
||||
defm b13_2ImmPred : ImmOpPred<[{ return isShiftedInt<13, 2>(N->getSExtValue());}]>;
|
||||
def b13_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<13, 2>(N->getSExtValue());}]>;
|
||||
def u5_0ImmOperand : AsmOperandClass { let Name = "u5_0Imm"; let RenderMethod = "addImmOperands"; }
|
||||
def u5_0Imm : Operand<i32> { let ParserMatchClass = u5_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
|
||||
defm u5_0ImmPred : ImmOpPred<[{ return isShiftedUInt<5, 0>(N->getSExtValue());}]>;
|
||||
def u5_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<5, 0>(N->getSExtValue());}]>;
|
||||
def u2_0ImmOperand : AsmOperandClass { let Name = "u2_0Imm"; let RenderMethod = "addImmOperands"; }
|
||||
def u2_0Imm : Operand<i32> { let ParserMatchClass = u2_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
|
||||
defm u2_0ImmPred : ImmOpPred<[{ return isShiftedUInt<2, 0>(N->getSExtValue());}]>;
|
||||
def u2_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<2, 0>(N->getSExtValue());}]>;
|
||||
def s4_2ImmOperand : AsmOperandClass { let Name = "s4_2Imm"; let RenderMethod = "addSignedImmOperands"; }
|
||||
def s4_2Imm : Operand<i32> { let ParserMatchClass = s4_2ImmOperand; let DecoderMethod = "s4_2ImmDecoder"; }
|
||||
defm s4_2ImmPred : ImmOpPred<[{ return isShiftedInt<4, 2>(N->getSExtValue());}]>;
|
||||
def s4_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<4, 2>(N->getSExtValue());}]>;
|
||||
def b30_2ImmOperand : AsmOperandClass { let Name = "b30_2Imm"; let RenderMethod = "addSignedImmOperands"; }
|
||||
def b30_2Imm : Operand<OtherVT> { let ParserMatchClass = b30_2ImmOperand; let DecoderMethod = "brtargetDecoder"; let PrintMethod = "printBrtarget"; }
|
||||
defm b30_2ImmPred : ImmOpPred<[{ return isShiftedInt<32, 2>(N->getSExtValue());}]>;
|
||||
def b30_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<32, 2>(N->getSExtValue());}]>;
|
||||
def u8_0ImmOperand : AsmOperandClass { let Name = "u8_0Imm"; let RenderMethod = "addImmOperands"; }
|
||||
def u8_0Imm : Operand<i32> { let ParserMatchClass = u8_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
|
||||
defm u8_0ImmPred : ImmOpPred<[{ return isShiftedUInt<8, 0>(N->getSExtValue());}]>;
|
||||
def u8_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<8, 0>(N->getSExtValue());}]>;
|
||||
def u30_2ImmOperand : AsmOperandClass { let Name = "u30_2Imm"; let RenderMethod = "addImmOperands"; }
|
||||
def u30_2Imm : Operand<i32> { let ParserMatchClass = u30_2ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
|
||||
defm u30_2ImmPred : ImmOpPred<[{ return isShiftedUInt<32, 2>(N->getSExtValue());}]>;
|
||||
def u30_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<32, 2>(N->getSExtValue());}]>;
|
||||
|
|
|
@ -22,14 +22,14 @@ class T_RP_pat <InstHexagon MI, Intrinsic IntID>
|
|||
|
||||
def: Pat<(int_hexagon_A2_add IntRegs:$Rs, IntRegs:$Rt),
|
||||
(A2_add IntRegs:$Rs, IntRegs:$Rt)>;
|
||||
def: Pat<(int_hexagon_A2_addi IntRegs:$Rs, timm:$s16),
|
||||
def: Pat<(int_hexagon_A2_addi IntRegs:$Rs, imm:$s16),
|
||||
(A2_addi IntRegs:$Rs, imm:$s16)>;
|
||||
def: Pat<(int_hexagon_A2_addp DoubleRegs:$Rs, DoubleRegs:$Rt),
|
||||
(A2_addp DoubleRegs:$Rs, DoubleRegs:$Rt)>;
|
||||
|
||||
def: Pat<(int_hexagon_A2_sub IntRegs:$Rs, IntRegs:$Rt),
|
||||
(A2_sub IntRegs:$Rs, IntRegs:$Rt)>;
|
||||
def: Pat<(int_hexagon_A2_subri timm:$s10, IntRegs:$Rs),
|
||||
def: Pat<(int_hexagon_A2_subri imm:$s10, IntRegs:$Rs),
|
||||
(A2_subri imm:$s10, IntRegs:$Rs)>;
|
||||
def: Pat<(int_hexagon_A2_subp DoubleRegs:$Rs, DoubleRegs:$Rt),
|
||||
(A2_subp DoubleRegs:$Rs, DoubleRegs:$Rt)>;
|
||||
|
@ -45,26 +45,26 @@ def: Pat<(int_hexagon_M2_dpmpyss_s0 IntRegs:$Rs, IntRegs:$Rt),
|
|||
def: Pat<(int_hexagon_M2_dpmpyuu_s0 IntRegs:$Rs, IntRegs:$Rt),
|
||||
(M2_dpmpyuu_s0 IntRegs:$Rs, IntRegs:$Rt)>;
|
||||
|
||||
def: Pat<(int_hexagon_S2_asl_i_r IntRegs:$Rs, timm:$u5),
|
||||
def: Pat<(int_hexagon_S2_asl_i_r IntRegs:$Rs, imm:$u5),
|
||||
(S2_asl_i_r IntRegs:$Rs, imm:$u5)>;
|
||||
def: Pat<(int_hexagon_S2_lsr_i_r IntRegs:$Rs, timm:$u5),
|
||||
def: Pat<(int_hexagon_S2_lsr_i_r IntRegs:$Rs, imm:$u5),
|
||||
(S2_lsr_i_r IntRegs:$Rs, imm:$u5)>;
|
||||
def: Pat<(int_hexagon_S2_asr_i_r IntRegs:$Rs, timm:$u5),
|
||||
def: Pat<(int_hexagon_S2_asr_i_r IntRegs:$Rs, imm:$u5),
|
||||
(S2_asr_i_r IntRegs:$Rs, imm:$u5)>;
|
||||
def: Pat<(int_hexagon_S2_asl_i_p DoubleRegs:$Rs, timm:$u6),
|
||||
def: Pat<(int_hexagon_S2_asl_i_p DoubleRegs:$Rs, imm:$u6),
|
||||
(S2_asl_i_p DoubleRegs:$Rs, imm:$u6)>;
|
||||
def: Pat<(int_hexagon_S2_lsr_i_p DoubleRegs:$Rs, timm:$u6),
|
||||
def: Pat<(int_hexagon_S2_lsr_i_p DoubleRegs:$Rs, imm:$u6),
|
||||
(S2_lsr_i_p DoubleRegs:$Rs, imm:$u6)>;
|
||||
def: Pat<(int_hexagon_S2_asr_i_p DoubleRegs:$Rs, timm:$u6),
|
||||
def: Pat<(int_hexagon_S2_asr_i_p DoubleRegs:$Rs, imm:$u6),
|
||||
(S2_asr_i_p DoubleRegs:$Rs, imm:$u6)>;
|
||||
|
||||
def: Pat<(int_hexagon_A2_and IntRegs:$Rs, IntRegs:$Rt),
|
||||
(A2_and IntRegs:$Rs, IntRegs:$Rt)>;
|
||||
def: Pat<(int_hexagon_A2_andir IntRegs:$Rs, timm:$s10),
|
||||
def: Pat<(int_hexagon_A2_andir IntRegs:$Rs, imm:$s10),
|
||||
(A2_andir IntRegs:$Rs, imm:$s10)>;
|
||||
def: Pat<(int_hexagon_A2_or IntRegs:$Rs, IntRegs:$Rt),
|
||||
(A2_or IntRegs:$Rs, IntRegs:$Rt)>;
|
||||
def: Pat<(int_hexagon_A2_orir IntRegs:$Rs, timm:$s10),
|
||||
def: Pat<(int_hexagon_A2_orir IntRegs:$Rs, imm:$s10),
|
||||
(A2_orir IntRegs:$Rs, imm:$s10)>;
|
||||
def: Pat<(int_hexagon_A2_xor IntRegs:$Rs, IntRegs:$Rt),
|
||||
(A2_xor IntRegs:$Rs, IntRegs:$Rt)>;
|
||||
|
@ -99,13 +99,13 @@ def : Pat <(int_hexagon_S5_asrhub_rnd_sat_goodsyntax I64:$Rs, (i32 0)),
|
|||
(S2_vsathub I64:$Rs)>;
|
||||
}
|
||||
|
||||
def : Pat <(int_hexagon_S2_asr_i_r_rnd_goodsyntax I32:$Rs, u5_0ImmPred_timm:$imm),
|
||||
def : Pat <(int_hexagon_S2_asr_i_r_rnd_goodsyntax I32:$Rs, u5_0ImmPred:$imm),
|
||||
(S2_asr_i_r_rnd I32:$Rs, (UDEC1 u5_0ImmPred:$imm))>;
|
||||
def : Pat <(int_hexagon_S2_asr_i_p_rnd_goodsyntax I64:$Rs, u6_0ImmPred_timm:$imm),
|
||||
def : Pat <(int_hexagon_S2_asr_i_p_rnd_goodsyntax I64:$Rs, u6_0ImmPred:$imm),
|
||||
(S2_asr_i_p_rnd I64:$Rs, (UDEC1 u6_0ImmPred:$imm))>;
|
||||
def : Pat <(int_hexagon_S5_vasrhrnd_goodsyntax I64:$Rs, u4_0ImmPred_timm:$imm),
|
||||
def : Pat <(int_hexagon_S5_vasrhrnd_goodsyntax I64:$Rs, u4_0ImmPred:$imm),
|
||||
(S5_vasrhrnd I64:$Rs, (UDEC1 u4_0ImmPred:$imm))>;
|
||||
def : Pat <(int_hexagon_S5_asrhub_rnd_sat_goodsyntax I64:$Rs, u4_0ImmPred_timm:$imm),
|
||||
def : Pat <(int_hexagon_S5_asrhub_rnd_sat_goodsyntax I64:$Rs, u4_0ImmPred:$imm),
|
||||
(S5_asrhub_rnd_sat I64:$Rs, (UDEC1 u4_0ImmPred:$imm))>;
|
||||
|
||||
def ImmExt64: SDNodeXForm<imm, [{
|
||||
|
@ -121,13 +121,13 @@ def ImmExt64: SDNodeXForm<imm, [{
|
|||
// To connect the builtin with the instruction, the builtin's operand
|
||||
// needs to be extended to the right type.
|
||||
|
||||
def : Pat<(int_hexagon_A2_tfrpi timm:$Is),
|
||||
def : Pat<(int_hexagon_A2_tfrpi imm:$Is),
|
||||
(A2_tfrpi (ImmExt64 $Is))>;
|
||||
|
||||
def : Pat <(int_hexagon_C2_cmpgei I32:$src1, s32_0ImmPred_timm:$src2),
|
||||
def : Pat <(int_hexagon_C2_cmpgei I32:$src1, s32_0ImmPred:$src2),
|
||||
(C2_tfrpr (C2_cmpgti I32:$src1, (SDEC1 s32_0ImmPred:$src2)))>;
|
||||
|
||||
def : Pat <(int_hexagon_C2_cmpgeui I32:$src1, u32_0ImmPred_timm:$src2),
|
||||
def : Pat <(int_hexagon_C2_cmpgeui I32:$src1, u32_0ImmPred:$src2),
|
||||
(C2_tfrpr (C2_cmpgtui I32:$src1, (UDEC1 u32_0ImmPred:$src2)))>;
|
||||
|
||||
def : Pat <(int_hexagon_C2_cmpgeui I32:$src, 0),
|
||||
|
@ -142,7 +142,7 @@ def : Pat <(int_hexagon_C2_cmpltu I32:$src1, I32:$src2),
|
|||
//===----------------------------------------------------------------------===//
|
||||
class S2op_tableidx_pat <Intrinsic IntID, InstHexagon OutputInst,
|
||||
SDNodeXForm XformImm>
|
||||
: Pat <(IntID I32:$src1, I32:$src2, u4_0ImmPred_timm:$src3, u5_0ImmPred_timm:$src4),
|
||||
: Pat <(IntID I32:$src1, I32:$src2, u4_0ImmPred:$src3, u5_0ImmPred:$src4),
|
||||
(OutputInst I32:$src1, I32:$src2, u4_0ImmPred:$src3,
|
||||
(XformImm u5_0ImmPred:$src4))>;
|
||||
|
||||
|
@ -197,11 +197,11 @@ class T_stc_pat <InstHexagon MI, Intrinsic IntID, PatLeaf Imm, PatLeaf Val>
|
|||
: Pat<(IntID I32:$Rs, Val:$Rt, I32:$Ru, Imm:$s),
|
||||
(MI I32:$Rs, Imm:$s, I32:$Ru, Val:$Rt)>;
|
||||
|
||||
def: T_stc_pat<S2_storerb_pci, int_hexagon_circ_stb, s4_0ImmPred_timm, I32>;
|
||||
def: T_stc_pat<S2_storerh_pci, int_hexagon_circ_sth, s4_1ImmPred_timm, I32>;
|
||||
def: T_stc_pat<S2_storeri_pci, int_hexagon_circ_stw, s4_2ImmPred_timm, I32>;
|
||||
def: T_stc_pat<S2_storerd_pci, int_hexagon_circ_std, s4_3ImmPred_timm, I64>;
|
||||
def: T_stc_pat<S2_storerf_pci, int_hexagon_circ_sthhi, s4_1ImmPred_timm, I32>;
|
||||
def: T_stc_pat<S2_storerb_pci, int_hexagon_circ_stb, s4_0ImmPred, I32>;
|
||||
def: T_stc_pat<S2_storerh_pci, int_hexagon_circ_sth, s4_1ImmPred, I32>;
|
||||
def: T_stc_pat<S2_storeri_pci, int_hexagon_circ_stw, s4_2ImmPred, I32>;
|
||||
def: T_stc_pat<S2_storerd_pci, int_hexagon_circ_std, s4_3ImmPred, I64>;
|
||||
def: T_stc_pat<S2_storerf_pci, int_hexagon_circ_sthhi, s4_1ImmPred, I32>;
|
||||
|
||||
multiclass MaskedStore <InstHexagon MI, Intrinsic IntID> {
|
||||
def : Pat<(IntID HvxQR:$src1, IntRegs:$src2, HvxVR:$src3),
|
||||
|
|
|
@ -360,7 +360,7 @@ class RDDSP_MM_DESC {
|
|||
dag OutOperandList = (outs GPR32Opnd:$rt);
|
||||
dag InOperandList = (ins uimm7:$mask);
|
||||
string AsmString = !strconcat("rddsp", "\t$rt, $mask");
|
||||
list<dag> Pattern = [(set GPR32Opnd:$rt, (int_mips_rddsp timmZExt7:$mask))];
|
||||
list<dag> Pattern = [(set GPR32Opnd:$rt, (int_mips_rddsp immZExt7:$mask))];
|
||||
InstrItinClass Itinerary = NoItinerary;
|
||||
}
|
||||
|
||||
|
@ -383,7 +383,7 @@ class WRDSP_MM_DESC {
|
|||
dag OutOperandList = (outs);
|
||||
dag InOperandList = (ins GPR32Opnd:$rt, uimm7:$mask);
|
||||
string AsmString = !strconcat("wrdsp", "\t$rt, $mask");
|
||||
list<dag> Pattern = [(int_mips_wrdsp GPR32Opnd:$rt, timmZExt7:$mask)];
|
||||
list<dag> Pattern = [(int_mips_wrdsp GPR32Opnd:$rt, immZExt7:$mask)];
|
||||
InstrItinClass Itinerary = NoItinerary;
|
||||
bit isMoveReg = 1;
|
||||
}
|
||||
|
|
|
@ -16,7 +16,6 @@
|
|||
|
||||
// shamt must fit in 6 bits.
|
||||
def immZExt6 : ImmLeaf<i32, [{return Imm == (Imm & 0x3f);}]>;
|
||||
def timmZExt6 : TImmLeaf<i32, [{return Imm == (Imm & 0x3f);}]>;
|
||||
|
||||
// Node immediate fits as 10-bit sign extended on target immediate.
|
||||
// e.g. seqi, snei
|
||||
|
|
|
@ -12,19 +12,12 @@
|
|||
|
||||
// ImmLeaf
|
||||
def immZExt1 : ImmLeaf<i32, [{return isUInt<1>(Imm);}]>;
|
||||
def timmZExt1 : ImmLeaf<i32, [{return isUInt<1>(Imm);}], NOOP_SDNodeXForm, timm>;
|
||||
def immZExt2 : ImmLeaf<i32, [{return isUInt<2>(Imm);}]>;
|
||||
def timmZExt2 : ImmLeaf<i32, [{return isUInt<2>(Imm);}], NOOP_SDNodeXForm, timm>;
|
||||
def immZExt3 : ImmLeaf<i32, [{return isUInt<3>(Imm);}]>;
|
||||
def timmZExt3 : ImmLeaf<i32, [{return isUInt<3>(Imm);}], NOOP_SDNodeXForm, timm>;
|
||||
def immZExt4 : ImmLeaf<i32, [{return isUInt<4>(Imm);}]>;
|
||||
def timmZExt4 : ImmLeaf<i32, [{return isUInt<4>(Imm);}], NOOP_SDNodeXForm, timm>;
|
||||
def immZExt8 : ImmLeaf<i32, [{return isUInt<8>(Imm);}]>;
|
||||
def timmZExt8 : ImmLeaf<i32, [{return isUInt<8>(Imm);}], NOOP_SDNodeXForm, timm>;
|
||||
def immZExt10 : ImmLeaf<i32, [{return isUInt<10>(Imm);}]>;
|
||||
def timmZExt10 : ImmLeaf<i32, [{return isUInt<10>(Imm);}], NOOP_SDNodeXForm, timm>;
|
||||
def immSExt6 : ImmLeaf<i32, [{return isInt<6>(Imm);}]>;
|
||||
def timmSExt6 : ImmLeaf<i32, [{return isInt<6>(Imm);}], NOOP_SDNodeXForm, timm>;
|
||||
def immSExt10 : ImmLeaf<i32, [{return isInt<10>(Imm);}]>;
|
||||
|
||||
// Mips-specific dsp nodes
|
||||
|
@ -313,7 +306,7 @@ class PRECR_SRA_PH_W_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
|
|||
dag OutOperandList = (outs ROT:$rt);
|
||||
dag InOperandList = (ins ROS:$rs, uimm5:$sa, ROS:$src);
|
||||
string AsmString = !strconcat(instr_asm, "\t$rt, $rs, $sa");
|
||||
list<dag> Pattern = [(set ROT:$rt, (OpNode ROS:$src, ROS:$rs, timmZExt5:$sa))];
|
||||
list<dag> Pattern = [(set ROT:$rt, (OpNode ROS:$src, ROS:$rs, immZExt5:$sa))];
|
||||
InstrItinClass Itinerary = itin;
|
||||
string Constraints = "$src = $rt";
|
||||
string BaseOpcode = instr_asm;
|
||||
|
@ -450,7 +443,7 @@ class RDDSP_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
|
|||
dag OutOperandList = (outs GPR32Opnd:$rd);
|
||||
dag InOperandList = (ins uimm10:$mask);
|
||||
string AsmString = !strconcat(instr_asm, "\t$rd, $mask");
|
||||
list<dag> Pattern = [(set GPR32Opnd:$rd, (OpNode timmZExt10:$mask))];
|
||||
list<dag> Pattern = [(set GPR32Opnd:$rd, (OpNode immZExt10:$mask))];
|
||||
InstrItinClass Itinerary = itin;
|
||||
string BaseOpcode = instr_asm;
|
||||
bit isMoveReg = 1;
|
||||
|
@ -461,7 +454,7 @@ class WRDSP_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
|
|||
dag OutOperandList = (outs);
|
||||
dag InOperandList = (ins GPR32Opnd:$rs, uimm10:$mask);
|
||||
string AsmString = !strconcat(instr_asm, "\t$rs, $mask");
|
||||
list<dag> Pattern = [(OpNode GPR32Opnd:$rs, timmZExt10:$mask)];
|
||||
list<dag> Pattern = [(OpNode GPR32Opnd:$rs, immZExt10:$mask)];
|
||||
InstrItinClass Itinerary = itin;
|
||||
string BaseOpcode = instr_asm;
|
||||
bit isMoveReg = 1;
|
||||
|
@ -1103,14 +1096,14 @@ class SHRLV_PH_DESC : SHLL_QB_R3_DESC_BASE<"shrlv.ph", int_mips_shrl_ph,
|
|||
NoItinerary, DSPROpnd>;
|
||||
|
||||
// Misc
|
||||
class APPEND_DESC : APPEND_DESC_BASE<"append", int_mips_append, uimm5, timmZExt5,
|
||||
class APPEND_DESC : APPEND_DESC_BASE<"append", int_mips_append, uimm5, immZExt5,
|
||||
NoItinerary>;
|
||||
|
||||
class BALIGN_DESC : APPEND_DESC_BASE<"balign", int_mips_balign, uimm2, timmZExt2,
|
||||
class BALIGN_DESC : APPEND_DESC_BASE<"balign", int_mips_balign, uimm2, immZExt2,
|
||||
NoItinerary>;
|
||||
|
||||
class PREPEND_DESC : APPEND_DESC_BASE<"prepend", int_mips_prepend, uimm5,
|
||||
timmZExt5, NoItinerary>;
|
||||
immZExt5, NoItinerary>;
|
||||
|
||||
// Pseudos.
|
||||
def BPOSGE32_PSEUDO : BPOSGE32_PSEUDO_DESC_BASE<int_mips_bposge32,
|
||||
|
|
|
@ -1263,7 +1263,6 @@ def immSExt16 : PatLeaf<(imm), [{ return isInt<16>(N->getSExtValue()); }]>;
|
|||
|
||||
// Node immediate fits as 7-bit zero extended on target immediate.
|
||||
def immZExt7 : PatLeaf<(imm), [{ return isUInt<7>(N->getZExtValue()); }]>;
|
||||
def timmZExt7 : PatLeaf<(timm), [{ return isUInt<7>(N->getZExtValue()); }]>;
|
||||
|
||||
// Node immediate fits as 16-bit zero extended on target immediate.
|
||||
// The LO16 param means that only the lower 16 bits of the node
|
||||
|
@ -1296,7 +1295,6 @@ def immZExt32 : PatLeaf<(imm), [{ return isUInt<32>(N->getZExtValue()); }]>;
|
|||
|
||||
// shamt field must fit in 5 bits.
|
||||
def immZExt5 : ImmLeaf<i32, [{return Imm == (Imm & 0x1f);}]>;
|
||||
def timmZExt5 : TImmLeaf<i32, [{return Imm == (Imm & 0x1f);}]>;
|
||||
|
||||
def immZExt5Plus1 : PatLeaf<(imm), [{
|
||||
return isUInt<5>(N->getZExtValue() - 1);
|
||||
|
|
|
@ -60,11 +60,6 @@ def immZExt2Ptr : ImmLeaf<iPTR, [{return isUInt<2>(Imm);}]>;
|
|||
def immZExt3Ptr : ImmLeaf<iPTR, [{return isUInt<3>(Imm);}]>;
|
||||
def immZExt4Ptr : ImmLeaf<iPTR, [{return isUInt<4>(Imm);}]>;
|
||||
|
||||
def timmZExt1Ptr : TImmLeaf<iPTR, [{return isUInt<1>(Imm);}]>;
|
||||
def timmZExt2Ptr : TImmLeaf<iPTR, [{return isUInt<2>(Imm);}]>;
|
||||
def timmZExt3Ptr : TImmLeaf<iPTR, [{return isUInt<3>(Imm);}]>;
|
||||
def timmZExt4Ptr : TImmLeaf<iPTR, [{return isUInt<4>(Imm);}]>;
|
||||
|
||||
// Operands
|
||||
|
||||
def immZExt2Lsa : ImmLeaf<i32, [{return isUInt<2>(Imm - 1);}]>;
|
||||
|
@ -1275,7 +1270,7 @@ class MSA_I8_SHF_DESC_BASE<string instr_asm, RegisterOperand ROWD,
|
|||
dag OutOperandList = (outs ROWD:$wd);
|
||||
dag InOperandList = (ins ROWS:$ws, uimm8:$u8);
|
||||
string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $u8");
|
||||
list<dag> Pattern = [(set ROWD:$wd, (MipsSHF timmZExt8:$u8, ROWS:$ws))];
|
||||
list<dag> Pattern = [(set ROWD:$wd, (MipsSHF immZExt8:$u8, ROWS:$ws))];
|
||||
InstrItinClass Itinerary = itin;
|
||||
}
|
||||
|
||||
|
@ -2304,13 +2299,13 @@ class INSERT_FW_VIDX64_PSEUDO_DESC :
|
|||
class INSERT_FD_VIDX64_PSEUDO_DESC :
|
||||
MSA_INSERT_VIDX_PSEUDO_BASE<vector_insert, v2f64, MSA128DOpnd, FGR64Opnd, GPR64Opnd>;
|
||||
|
||||
class INSVE_B_DESC : MSA_INSVE_DESC_BASE<"insve.b", insve_v16i8, uimm4, timmZExt4,
|
||||
class INSVE_B_DESC : MSA_INSVE_DESC_BASE<"insve.b", insve_v16i8, uimm4, immZExt4,
|
||||
MSA128BOpnd>;
|
||||
class INSVE_H_DESC : MSA_INSVE_DESC_BASE<"insve.h", insve_v8i16, uimm3, timmZExt3,
|
||||
class INSVE_H_DESC : MSA_INSVE_DESC_BASE<"insve.h", insve_v8i16, uimm3, immZExt3,
|
||||
MSA128HOpnd>;
|
||||
class INSVE_W_DESC : MSA_INSVE_DESC_BASE<"insve.w", insve_v4i32, uimm2, timmZExt2,
|
||||
class INSVE_W_DESC : MSA_INSVE_DESC_BASE<"insve.w", insve_v4i32, uimm2, immZExt2,
|
||||
MSA128WOpnd>;
|
||||
class INSVE_D_DESC : MSA_INSVE_DESC_BASE<"insve.d", insve_v2i64, uimm1, timmZExt1,
|
||||
class INSVE_D_DESC : MSA_INSVE_DESC_BASE<"insve.d", insve_v2i64, uimm1, immZExt1,
|
||||
MSA128DOpnd>;
|
||||
|
||||
class LD_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
|
||||
|
@ -2523,22 +2518,22 @@ class PCNT_W_DESC : MSA_2R_DESC_BASE<"pcnt.w", ctpop, MSA128WOpnd>;
|
|||
class PCNT_D_DESC : MSA_2R_DESC_BASE<"pcnt.d", ctpop, MSA128DOpnd>;
|
||||
|
||||
class SAT_S_B_DESC : MSA_BIT_X_DESC_BASE<"sat_s.b", int_mips_sat_s_b, uimm3,
|
||||
timmZExt3, MSA128BOpnd>;
|
||||
immZExt3, MSA128BOpnd>;
|
||||
class SAT_S_H_DESC : MSA_BIT_X_DESC_BASE<"sat_s.h", int_mips_sat_s_h, uimm4,
|
||||
timmZExt4, MSA128HOpnd>;
|
||||
immZExt4, MSA128HOpnd>;
|
||||
class SAT_S_W_DESC : MSA_BIT_X_DESC_BASE<"sat_s.w", int_mips_sat_s_w, uimm5,
|
||||
timmZExt5, MSA128WOpnd>;
|
||||
immZExt5, MSA128WOpnd>;
|
||||
class SAT_S_D_DESC : MSA_BIT_X_DESC_BASE<"sat_s.d", int_mips_sat_s_d, uimm6,
|
||||
timmZExt6, MSA128DOpnd>;
|
||||
immZExt6, MSA128DOpnd>;
|
||||
|
||||
class SAT_U_B_DESC : MSA_BIT_X_DESC_BASE<"sat_u.b", int_mips_sat_u_b, uimm3,
|
||||
timmZExt3, MSA128BOpnd>;
|
||||
immZExt3, MSA128BOpnd>;
|
||||
class SAT_U_H_DESC : MSA_BIT_X_DESC_BASE<"sat_u.h", int_mips_sat_u_h, uimm4,
|
||||
timmZExt4, MSA128HOpnd>;
|
||||
immZExt4, MSA128HOpnd>;
|
||||
class SAT_U_W_DESC : MSA_BIT_X_DESC_BASE<"sat_u.w", int_mips_sat_u_w, uimm5,
|
||||
timmZExt5, MSA128WOpnd>;
|
||||
immZExt5, MSA128WOpnd>;
|
||||
class SAT_U_D_DESC : MSA_BIT_X_DESC_BASE<"sat_u.d", int_mips_sat_u_d, uimm6,
|
||||
timmZExt6, MSA128DOpnd>;
|
||||
immZExt6, MSA128DOpnd>;
|
||||
|
||||
class SHF_B_DESC : MSA_I8_SHF_DESC_BASE<"shf.b", MSA128BOpnd>;
|
||||
class SHF_H_DESC : MSA_I8_SHF_DESC_BASE<"shf.h", MSA128HOpnd>;
|
||||
|
@ -2551,16 +2546,16 @@ class SLD_D_DESC : MSA_3R_SLD_DESC_BASE<"sld.d", int_mips_sld_d, MSA128DOpnd>;
|
|||
|
||||
class SLDI_B_DESC : MSA_ELM_SLD_DESC_BASE<"sldi.b", int_mips_sldi_b,
|
||||
MSA128BOpnd, MSA128BOpnd, uimm4,
|
||||
timmZExt4>;
|
||||
immZExt4>;
|
||||
class SLDI_H_DESC : MSA_ELM_SLD_DESC_BASE<"sldi.h", int_mips_sldi_h,
|
||||
MSA128HOpnd, MSA128HOpnd, uimm3,
|
||||
timmZExt3>;
|
||||
immZExt3>;
|
||||
class SLDI_W_DESC : MSA_ELM_SLD_DESC_BASE<"sldi.w", int_mips_sldi_w,
|
||||
MSA128WOpnd, MSA128WOpnd, uimm2,
|
||||
timmZExt2>;
|
||||
immZExt2>;
|
||||
class SLDI_D_DESC : MSA_ELM_SLD_DESC_BASE<"sldi.d", int_mips_sldi_d,
|
||||
MSA128DOpnd, MSA128DOpnd, uimm1,
|
||||
timmZExt1>;
|
||||
immZExt1>;
|
||||
|
||||
class SLL_B_DESC : MSA_3R_DESC_BASE<"sll.b", shl, MSA128BOpnd>;
|
||||
class SLL_H_DESC : MSA_3R_DESC_BASE<"sll.h", shl, MSA128HOpnd>;
|
||||
|
@ -2614,13 +2609,13 @@ class SRAR_W_DESC : MSA_3R_DESC_BASE<"srar.w", int_mips_srar_w, MSA128WOpnd>;
|
|||
class SRAR_D_DESC : MSA_3R_DESC_BASE<"srar.d", int_mips_srar_d, MSA128DOpnd>;
|
||||
|
||||
class SRARI_B_DESC : MSA_BIT_X_DESC_BASE<"srari.b", int_mips_srari_b, uimm3,
|
||||
timmZExt3, MSA128BOpnd>;
|
||||
immZExt3, MSA128BOpnd>;
|
||||
class SRARI_H_DESC : MSA_BIT_X_DESC_BASE<"srari.h", int_mips_srari_h, uimm4,
|
||||
timmZExt4, MSA128HOpnd>;
|
||||
immZExt4, MSA128HOpnd>;
|
||||
class SRARI_W_DESC : MSA_BIT_X_DESC_BASE<"srari.w", int_mips_srari_w, uimm5,
|
||||
timmZExt5, MSA128WOpnd>;
|
||||
immZExt5, MSA128WOpnd>;
|
||||
class SRARI_D_DESC : MSA_BIT_X_DESC_BASE<"srari.d", int_mips_srari_d, uimm6,
|
||||
timmZExt6, MSA128DOpnd>;
|
||||
immZExt6, MSA128DOpnd>;
|
||||
|
||||
class SRL_B_DESC : MSA_3R_DESC_BASE<"srl.b", srl, MSA128BOpnd>;
|
||||
class SRL_H_DESC : MSA_3R_DESC_BASE<"srl.h", srl, MSA128HOpnd>;
|
||||
|
@ -2642,13 +2637,13 @@ class SRLR_W_DESC : MSA_3R_DESC_BASE<"srlr.w", int_mips_srlr_w, MSA128WOpnd>;
|
|||
class SRLR_D_DESC : MSA_3R_DESC_BASE<"srlr.d", int_mips_srlr_d, MSA128DOpnd>;
|
||||
|
||||
class SRLRI_B_DESC : MSA_BIT_X_DESC_BASE<"srlri.b", int_mips_srlri_b, uimm3,
|
||||
timmZExt3, MSA128BOpnd>;
|
||||
immZExt3, MSA128BOpnd>;
|
||||
class SRLRI_H_DESC : MSA_BIT_X_DESC_BASE<"srlri.h", int_mips_srlri_h, uimm4,
|
||||
timmZExt4, MSA128HOpnd>;
|
||||
immZExt4, MSA128HOpnd>;
|
||||
class SRLRI_W_DESC : MSA_BIT_X_DESC_BASE<"srlri.w", int_mips_srlri_w, uimm5,
|
||||
timmZExt5, MSA128WOpnd>;
|
||||
immZExt5, MSA128WOpnd>;
|
||||
class SRLRI_D_DESC : MSA_BIT_X_DESC_BASE<"srlri.d", int_mips_srlri_d, uimm6,
|
||||
timmZExt6, MSA128DOpnd>;
|
||||
immZExt6, MSA128DOpnd>;
|
||||
|
||||
class ST_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
|
||||
ValueType TyNode, RegisterOperand ROWD,
|
||||
|
|
|
@ -2596,8 +2596,7 @@ static SDValue lowerVECTOR_SHUFFLE_SHF(SDValue Op, EVT ResTy,
|
|||
|
||||
SDLoc DL(Op);
|
||||
return DAG.getNode(MipsISD::SHF, DL, ResTy,
|
||||
DAG.getTargetConstant(Imm, DL, MVT::i32),
|
||||
Op->getOperand(0));
|
||||
DAG.getConstant(Imm, DL, MVT::i32), Op->getOperand(0));
|
||||
}
|
||||
|
||||
/// Determine whether a range fits a regular pattern of values.
|
||||
|
|
|
@ -331,7 +331,7 @@ class VXBX_Int_Ty<bits<11> xo, string opc, Intrinsic IntID, ValueType Ty>
|
|||
class VXCR_Int_Ty<bits<11> xo, string opc, Intrinsic IntID, ValueType Ty>
|
||||
: VXForm_CR<xo, (outs vrrc:$vD), (ins vrrc:$vA, u1imm:$ST, u4imm:$SIX),
|
||||
!strconcat(opc, " $vD, $vA, $ST, $SIX"), IIC_VecFP,
|
||||
[(set Ty:$vD, (IntID Ty:$vA, timm:$ST, timm:$SIX))]>;
|
||||
[(set Ty:$vD, (IntID Ty:$vA, imm:$ST, imm:$SIX))]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Instruction Definitions.
|
||||
|
@ -401,10 +401,10 @@ let isCodeGenOnly = 1 in {
|
|||
|
||||
def MFVSCR : VXForm_4<1540, (outs vrrc:$vD), (ins),
|
||||
"mfvscr $vD", IIC_LdStStore,
|
||||
[(set v8i16:$vD, (int_ppc_altivec_mfvscr))]>;
|
||||
[(set v8i16:$vD, (int_ppc_altivec_mfvscr))]>;
|
||||
def MTVSCR : VXForm_5<1604, (outs), (ins vrrc:$vB),
|
||||
"mtvscr $vB", IIC_LdStLoad,
|
||||
[(int_ppc_altivec_mtvscr v4i32:$vB)]>;
|
||||
[(int_ppc_altivec_mtvscr v4i32:$vB)]>;
|
||||
|
||||
let PPC970_Unit = 2, mayLoad = 1, mayStore = 0 in { // Loads.
|
||||
def LVEBX: XForm_1_memOp<31, 7, (outs vrrc:$vD), (ins memrr:$src),
|
||||
|
|
|
@ -2868,12 +2868,12 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
|
|||
(outs vsrc:$XT), (ins u7imm:$DCMX, vsrc:$XB),
|
||||
"xvtstdcsp $XT, $XB, $DCMX", IIC_VecFP,
|
||||
[(set v4i32: $XT,
|
||||
(int_ppc_vsx_xvtstdcsp v4f32:$XB, timm:$DCMX))]>;
|
||||
(int_ppc_vsx_xvtstdcsp v4f32:$XB, imm:$DCMX))]>;
|
||||
def XVTSTDCDP : XX2_RD6_DCMX7_RS6<60, 15, 5,
|
||||
(outs vsrc:$XT), (ins u7imm:$DCMX, vsrc:$XB),
|
||||
"xvtstdcdp $XT, $XB, $DCMX", IIC_VecFP,
|
||||
[(set v2i64: $XT,
|
||||
(int_ppc_vsx_xvtstdcdp v2f64:$XB, timm:$DCMX))]>;
|
||||
(int_ppc_vsx_xvtstdcdp v2f64:$XB, imm:$DCMX))]>;
|
||||
|
||||
//===--------------------------------------------------------------------===//
|
||||
|
||||
|
|
|
@ -214,12 +214,12 @@ class PseudoMaskedAMOUMinUMax
|
|||
}
|
||||
|
||||
class PseudoMaskedAMOPat<Intrinsic intrin, Pseudo AMOInst>
|
||||
: Pat<(intrin GPR:$addr, GPR:$incr, GPR:$mask, timm:$ordering),
|
||||
: Pat<(intrin GPR:$addr, GPR:$incr, GPR:$mask, imm:$ordering),
|
||||
(AMOInst GPR:$addr, GPR:$incr, GPR:$mask, imm:$ordering)>;
|
||||
|
||||
class PseudoMaskedAMOMinMaxPat<Intrinsic intrin, Pseudo AMOInst>
|
||||
: Pat<(intrin GPR:$addr, GPR:$incr, GPR:$mask, GPR:$shiftamt,
|
||||
timm:$ordering),
|
||||
imm:$ordering),
|
||||
(AMOInst GPR:$addr, GPR:$incr, GPR:$mask, GPR:$shiftamt,
|
||||
imm:$ordering)>;
|
||||
|
||||
|
@ -288,7 +288,7 @@ def PseudoMaskedCmpXchg32
|
|||
}
|
||||
|
||||
def : Pat<(int_riscv_masked_cmpxchg_i32
|
||||
GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering),
|
||||
GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, imm:$ordering),
|
||||
(PseudoMaskedCmpXchg32
|
||||
GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, imm:$ordering)>;
|
||||
|
||||
|
@ -365,7 +365,7 @@ def PseudoCmpXchg64 : PseudoCmpXchg;
|
|||
defm : PseudoCmpXchgPat<"atomic_cmp_swap_64", PseudoCmpXchg64>;
|
||||
|
||||
def : Pat<(int_riscv_masked_cmpxchg_i64
|
||||
GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering),
|
||||
GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, imm:$ordering),
|
||||
(PseudoMaskedCmpXchg32
|
||||
GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, imm:$ordering)>;
|
||||
} // Predicates = [HasStdExtA, IsRV64]
|
||||
|
|
|
@ -1146,7 +1146,7 @@ void SystemZDAGToDAGISel::loadVectorConstant(
|
|||
SDLoc DL(Node);
|
||||
SmallVector<SDValue, 2> Ops;
|
||||
for (unsigned OpVal : VCI.OpVals)
|
||||
Ops.push_back(CurDAG->getTargetConstant(OpVal, DL, MVT::i32));
|
||||
Ops.push_back(CurDAG->getConstant(OpVal, DL, MVT::i32));
|
||||
SDValue Op = CurDAG->getNode(VCI.Opcode, DL, VCI.VecVT, Ops);
|
||||
|
||||
if (VCI.VecVT == VT.getSimpleVT())
|
||||
|
@ -1550,8 +1550,8 @@ void SystemZDAGToDAGISel::Select(SDNode *Node) {
|
|||
uint64_t ConstCCMask =
|
||||
cast<ConstantSDNode>(CCMask.getNode())->getZExtValue();
|
||||
// Invert the condition.
|
||||
CCMask = CurDAG->getTargetConstant(ConstCCValid ^ ConstCCMask,
|
||||
SDLoc(Node), CCMask.getValueType());
|
||||
CCMask = CurDAG->getConstant(ConstCCValid ^ ConstCCMask, SDLoc(Node),
|
||||
CCMask.getValueType());
|
||||
SDValue Op4 = Node->getOperand(4);
|
||||
SDNode *UpdatedNode =
|
||||
CurDAG->UpdateNodeOperands(Node, Op1, Op0, CCValid, CCMask, Op4);
|
||||
|
|
|
@ -2549,12 +2549,12 @@ static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
|
|||
}
|
||||
if (C.Opcode == SystemZISD::ICMP)
|
||||
return DAG.getNode(SystemZISD::ICMP, DL, MVT::i32, C.Op0, C.Op1,
|
||||
DAG.getTargetConstant(C.ICmpType, DL, MVT::i32));
|
||||
DAG.getConstant(C.ICmpType, DL, MVT::i32));
|
||||
if (C.Opcode == SystemZISD::TM) {
|
||||
bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) !=
|
||||
bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_1));
|
||||
return DAG.getNode(SystemZISD::TM, DL, MVT::i32, C.Op0, C.Op1,
|
||||
DAG.getTargetConstant(RegisterOnly, DL, MVT::i32));
|
||||
DAG.getConstant(RegisterOnly, DL, MVT::i32));
|
||||
}
|
||||
return DAG.getNode(C.Opcode, DL, MVT::i32, C.Op0, C.Op1);
|
||||
}
|
||||
|
@ -2592,10 +2592,10 @@ static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
|
|||
// in CCValid, so other values can be ignored.
|
||||
static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg,
|
||||
unsigned CCValid, unsigned CCMask) {
|
||||
SDValue Ops[] = {DAG.getConstant(1, DL, MVT::i32),
|
||||
DAG.getConstant(0, DL, MVT::i32),
|
||||
DAG.getTargetConstant(CCValid, DL, MVT::i32),
|
||||
DAG.getTargetConstant(CCMask, DL, MVT::i32), CCReg};
|
||||
SDValue Ops[] = { DAG.getConstant(1, DL, MVT::i32),
|
||||
DAG.getConstant(0, DL, MVT::i32),
|
||||
DAG.getConstant(CCValid, DL, MVT::i32),
|
||||
DAG.getConstant(CCMask, DL, MVT::i32), CCReg };
|
||||
return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, MVT::i32, Ops);
|
||||
}
|
||||
|
||||
|
@ -2757,10 +2757,9 @@ SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
|
|||
|
||||
Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
|
||||
SDValue CCReg = emitCmp(DAG, DL, C);
|
||||
return DAG.getNode(
|
||||
SystemZISD::BR_CCMASK, DL, Op.getValueType(), Op.getOperand(0),
|
||||
DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
|
||||
DAG.getTargetConstant(C.CCMask, DL, MVT::i32), Dest, CCReg);
|
||||
return DAG.getNode(SystemZISD::BR_CCMASK, DL, Op.getValueType(),
|
||||
Op.getOperand(0), DAG.getConstant(C.CCValid, DL, MVT::i32),
|
||||
DAG.getConstant(C.CCMask, DL, MVT::i32), Dest, CCReg);
|
||||
}
|
||||
|
||||
// Return true if Pos is CmpOp and Neg is the negative of CmpOp,
|
||||
|
@ -2811,9 +2810,8 @@ SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,
|
|||
}
|
||||
|
||||
SDValue CCReg = emitCmp(DAG, DL, C);
|
||||
SDValue Ops[] = {TrueOp, FalseOp,
|
||||
DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
|
||||
DAG.getTargetConstant(C.CCMask, DL, MVT::i32), CCReg};
|
||||
SDValue Ops[] = {TrueOp, FalseOp, DAG.getConstant(C.CCValid, DL, MVT::i32),
|
||||
DAG.getConstant(C.CCMask, DL, MVT::i32), CCReg};
|
||||
|
||||
return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, Op.getValueType(), Ops);
|
||||
}
|
||||
|
@ -3900,8 +3898,11 @@ SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op,
|
|||
bool IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
|
||||
unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ;
|
||||
auto *Node = cast<MemIntrinsicSDNode>(Op.getNode());
|
||||
SDValue Ops[] = {Op.getOperand(0), DAG.getTargetConstant(Code, DL, MVT::i32),
|
||||
Op.getOperand(1)};
|
||||
SDValue Ops[] = {
|
||||
Op.getOperand(0),
|
||||
DAG.getConstant(Code, DL, MVT::i32),
|
||||
Op.getOperand(1)
|
||||
};
|
||||
return DAG.getMemIntrinsicNode(SystemZISD::PREFETCH, DL,
|
||||
Node->getVTList(), Ops,
|
||||
Node->getMemoryVT(), Node->getMemOperand());
|
||||
|
@ -4243,7 +4244,7 @@ static SDValue getPermuteNode(SelectionDAG &DAG, const SDLoc &DL,
|
|||
Op1 = DAG.getNode(ISD::BITCAST, DL, InVT, Op1);
|
||||
SDValue Op;
|
||||
if (P.Opcode == SystemZISD::PERMUTE_DWORDS) {
|
||||
SDValue Op2 = DAG.getTargetConstant(P.Operand, DL, MVT::i32);
|
||||
SDValue Op2 = DAG.getConstant(P.Operand, DL, MVT::i32);
|
||||
Op = DAG.getNode(SystemZISD::PERMUTE_DWORDS, DL, InVT, Op0, Op1, Op2);
|
||||
} else if (P.Opcode == SystemZISD::PACK) {
|
||||
MVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(P.Operand * 8),
|
||||
|
@ -4268,8 +4269,7 @@ static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL,
|
|||
unsigned StartIndex, OpNo0, OpNo1;
|
||||
if (isShlDoublePermute(Bytes, StartIndex, OpNo0, OpNo1))
|
||||
return DAG.getNode(SystemZISD::SHL_DOUBLE, DL, MVT::v16i8, Ops[OpNo0],
|
||||
Ops[OpNo1],
|
||||
DAG.getTargetConstant(StartIndex, DL, MVT::i32));
|
||||
Ops[OpNo1], DAG.getConstant(StartIndex, DL, MVT::i32));
|
||||
|
||||
// Fall back on VPERM. Construct an SDNode for the permute vector.
|
||||
SDValue IndexNodes[SystemZ::VectorBytes];
|
||||
|
@ -4767,7 +4767,7 @@ SDValue SystemZTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
|
|||
return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0.getOperand(Index));
|
||||
// Otherwise keep it as a vector-to-vector operation.
|
||||
return DAG.getNode(SystemZISD::SPLAT, DL, VT, Op.getOperand(0),
|
||||
DAG.getTargetConstant(Index, DL, MVT::i32));
|
||||
DAG.getConstant(Index, DL, MVT::i32));
|
||||
}
|
||||
|
||||
GeneralShuffle GS(VT);
|
||||
|
@ -6057,8 +6057,8 @@ SDValue SystemZTargetLowering::combineBR_CCMASK(
|
|||
if (combineCCMask(CCReg, CCValidVal, CCMaskVal))
|
||||
return DAG.getNode(SystemZISD::BR_CCMASK, SDLoc(N), N->getValueType(0),
|
||||
Chain,
|
||||
DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32),
|
||||
DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32),
|
||||
DAG.getConstant(CCValidVal, SDLoc(N), MVT::i32),
|
||||
DAG.getConstant(CCMaskVal, SDLoc(N), MVT::i32),
|
||||
N->getOperand(3), CCReg);
|
||||
return SDValue();
|
||||
}
|
||||
|
@ -6079,9 +6079,10 @@ SDValue SystemZTargetLowering::combineSELECT_CCMASK(
|
|||
|
||||
if (combineCCMask(CCReg, CCValidVal, CCMaskVal))
|
||||
return DAG.getNode(SystemZISD::SELECT_CCMASK, SDLoc(N), N->getValueType(0),
|
||||
N->getOperand(0), N->getOperand(1),
|
||||
DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32),
|
||||
DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32),
|
||||
N->getOperand(0),
|
||||
N->getOperand(1),
|
||||
DAG.getConstant(CCValidVal, SDLoc(N), MVT::i32),
|
||||
DAG.getConstant(CCMaskVal, SDLoc(N), MVT::i32),
|
||||
CCReg);
|
||||
return SDValue();
|
||||
}
|
||||
|
|
|
@ -2141,17 +2141,17 @@ class FixedCondBranchRXY<CondVariant V, string mnemonic, bits<16> opcode,
|
|||
}
|
||||
|
||||
class CmpBranchRIEa<string mnemonic, bits<16> opcode,
|
||||
RegisterOperand cls, ImmOpWithPattern imm>
|
||||
RegisterOperand cls, Immediate imm>
|
||||
: InstRIEa<opcode, (outs), (ins cls:$R1, imm:$I2, cond4:$M3),
|
||||
mnemonic#"$M3\t$R1, $I2", []>;
|
||||
|
||||
class AsmCmpBranchRIEa<string mnemonic, bits<16> opcode,
|
||||
RegisterOperand cls, ImmOpWithPattern imm>
|
||||
RegisterOperand cls, Immediate imm>
|
||||
: InstRIEa<opcode, (outs), (ins cls:$R1, imm:$I2, imm32zx4:$M3),
|
||||
mnemonic#"\t$R1, $I2, $M3", []>;
|
||||
|
||||
class FixedCmpBranchRIEa<CondVariant V, string mnemonic, bits<16> opcode,
|
||||
RegisterOperand cls, ImmOpWithPattern imm>
|
||||
RegisterOperand cls, Immediate imm>
|
||||
: InstRIEa<opcode, (outs), (ins cls:$R1, imm:$I2),
|
||||
mnemonic#V.suffix#"\t$R1, $I2", []> {
|
||||
let isAsmParserOnly = V.alternate;
|
||||
|
@ -2159,7 +2159,7 @@ class FixedCmpBranchRIEa<CondVariant V, string mnemonic, bits<16> opcode,
|
|||
}
|
||||
|
||||
multiclass CmpBranchRIEaPair<string mnemonic, bits<16> opcode,
|
||||
RegisterOperand cls, ImmOpWithPattern imm> {
|
||||
RegisterOperand cls, Immediate imm> {
|
||||
let isCodeGenOnly = 1 in
|
||||
def "" : CmpBranchRIEa<mnemonic, opcode, cls, imm>;
|
||||
def Asm : AsmCmpBranchRIEa<mnemonic, opcode, cls, imm>;
|
||||
|
@ -2193,19 +2193,19 @@ multiclass CmpBranchRIEbPair<string mnemonic, bits<16> opcode,
|
|||
}
|
||||
|
||||
class CmpBranchRIEc<string mnemonic, bits<16> opcode,
|
||||
RegisterOperand cls, ImmOpWithPattern imm>
|
||||
RegisterOperand cls, Immediate imm>
|
||||
: InstRIEc<opcode, (outs),
|
||||
(ins cls:$R1, imm:$I2, cond4:$M3, brtarget16:$RI4),
|
||||
mnemonic#"$M3\t$R1, $I2, $RI4", []>;
|
||||
|
||||
class AsmCmpBranchRIEc<string mnemonic, bits<16> opcode,
|
||||
RegisterOperand cls, ImmOpWithPattern imm>
|
||||
RegisterOperand cls, Immediate imm>
|
||||
: InstRIEc<opcode, (outs),
|
||||
(ins cls:$R1, imm:$I2, imm32zx4:$M3, brtarget16:$RI4),
|
||||
mnemonic#"\t$R1, $I2, $M3, $RI4", []>;
|
||||
|
||||
class FixedCmpBranchRIEc<CondVariant V, string mnemonic, bits<16> opcode,
|
||||
RegisterOperand cls, ImmOpWithPattern imm>
|
||||
RegisterOperand cls, Immediate imm>
|
||||
: InstRIEc<opcode, (outs), (ins cls:$R1, imm:$I2, brtarget16:$RI4),
|
||||
mnemonic#V.suffix#"\t$R1, $I2, $RI4", []> {
|
||||
let isAsmParserOnly = V.alternate;
|
||||
|
@ -2213,7 +2213,7 @@ class FixedCmpBranchRIEc<CondVariant V, string mnemonic, bits<16> opcode,
|
|||
}
|
||||
|
||||
multiclass CmpBranchRIEcPair<string mnemonic, bits<16> opcode,
|
||||
RegisterOperand cls, ImmOpWithPattern imm> {
|
||||
RegisterOperand cls, Immediate imm> {
|
||||
let isCodeGenOnly = 1 in
|
||||
def "" : CmpBranchRIEc<mnemonic, opcode, cls, imm>;
|
||||
def Asm : AsmCmpBranchRIEc<mnemonic, opcode, cls, imm>;
|
||||
|
@ -2272,19 +2272,19 @@ multiclass CmpBranchRRSPair<string mnemonic, bits<16> opcode,
|
|||
}
|
||||
|
||||
class CmpBranchRIS<string mnemonic, bits<16> opcode,
|
||||
RegisterOperand cls, ImmOpWithPattern imm>
|
||||
RegisterOperand cls, Immediate imm>
|
||||
: InstRIS<opcode, (outs),
|
||||
(ins cls:$R1, imm:$I2, cond4:$M3, bdaddr12only:$BD4),
|
||||
mnemonic#"$M3\t$R1, $I2, $BD4", []>;
|
||||
|
||||
class AsmCmpBranchRIS<string mnemonic, bits<16> opcode,
|
||||
RegisterOperand cls, ImmOpWithPattern imm>
|
||||
RegisterOperand cls, Immediate imm>
|
||||
: InstRIS<opcode, (outs),
|
||||
(ins cls:$R1, imm:$I2, imm32zx4:$M3, bdaddr12only:$BD4),
|
||||
mnemonic#"\t$R1, $I2, $M3, $BD4", []>;
|
||||
|
||||
class FixedCmpBranchRIS<CondVariant V, string mnemonic, bits<16> opcode,
|
||||
RegisterOperand cls, ImmOpWithPattern imm>
|
||||
RegisterOperand cls, Immediate imm>
|
||||
: InstRIS<opcode, (outs), (ins cls:$R1, imm:$I2, bdaddr12only:$BD4),
|
||||
mnemonic#V.suffix#"\t$R1, $I2, $BD4", []> {
|
||||
let isAsmParserOnly = V.alternate;
|
||||
|
@ -2292,7 +2292,7 @@ class FixedCmpBranchRIS<CondVariant V, string mnemonic, bits<16> opcode,
|
|||
}
|
||||
|
||||
multiclass CmpBranchRISPair<string mnemonic, bits<16> opcode,
|
||||
RegisterOperand cls, ImmOpWithPattern imm> {
|
||||
RegisterOperand cls, Immediate imm> {
|
||||
let isCodeGenOnly = 1 in
|
||||
def "" : CmpBranchRIS<mnemonic, opcode, cls, imm>;
|
||||
def Asm : AsmCmpBranchRIS<mnemonic, opcode, cls, imm>;
|
||||
|
@ -2585,7 +2585,7 @@ multiclass StoreMultipleVRSaAlign<string mnemonic, bits<16> opcode> {
|
|||
// We therefore match the address in the same way as a normal store and
|
||||
// only use the StoreSI* instruction if the matched address is suitable.
|
||||
class StoreSI<string mnemonic, bits<8> opcode, SDPatternOperator operator,
|
||||
ImmOpWithPattern imm>
|
||||
Immediate imm>
|
||||
: InstSI<opcode, (outs), (ins mviaddr12pair:$BD1, imm:$I2),
|
||||
mnemonic#"\t$BD1, $I2",
|
||||
[(operator imm:$I2, mviaddr12pair:$BD1)]> {
|
||||
|
@ -2593,7 +2593,7 @@ class StoreSI<string mnemonic, bits<8> opcode, SDPatternOperator operator,
|
|||
}
|
||||
|
||||
class StoreSIY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
|
||||
ImmOpWithPattern imm>
|
||||
Immediate imm>
|
||||
: InstSIY<opcode, (outs), (ins mviaddr20pair:$BD1, imm:$I2),
|
||||
mnemonic#"\t$BD1, $I2",
|
||||
[(operator imm:$I2, mviaddr20pair:$BD1)]> {
|
||||
|
@ -2601,7 +2601,7 @@ class StoreSIY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
|
|||
}
|
||||
|
||||
class StoreSIL<string mnemonic, bits<16> opcode, SDPatternOperator operator,
|
||||
ImmOpWithPattern imm>
|
||||
Immediate imm>
|
||||
: InstSIL<opcode, (outs), (ins mviaddr12pair:$BD1, imm:$I2),
|
||||
mnemonic#"\t$BD1, $I2",
|
||||
[(operator imm:$I2, mviaddr12pair:$BD1)]> {
|
||||
|
@ -2609,7 +2609,7 @@ class StoreSIL<string mnemonic, bits<16> opcode, SDPatternOperator operator,
|
|||
}
|
||||
|
||||
multiclass StoreSIPair<string mnemonic, bits<8> siOpcode, bits<16> siyOpcode,
|
||||
SDPatternOperator operator, ImmOpWithPattern imm> {
|
||||
SDPatternOperator operator, Immediate imm> {
|
||||
let DispKey = mnemonic in {
|
||||
let DispSize = "12" in
|
||||
def "" : StoreSI<mnemonic, siOpcode, operator, imm>;
|
||||
|
@ -2665,7 +2665,7 @@ multiclass CondStoreRSYPair<string mnemonic, bits<16> opcode,
|
|||
def Asm : AsmCondStoreRSY<mnemonic, opcode, cls, bytes, mode>;
|
||||
}
|
||||
|
||||
class SideEffectUnaryI<string mnemonic, bits<8> opcode, ImmOpWithPattern imm>
|
||||
class SideEffectUnaryI<string mnemonic, bits<8> opcode, Immediate imm>
|
||||
: InstI<opcode, (outs), (ins imm:$I1),
|
||||
mnemonic#"\t$I1", []>;
|
||||
|
||||
|
@ -2761,13 +2761,13 @@ class UnaryMemRRFc<string mnemonic, bits<16> opcode,
|
|||
}
|
||||
|
||||
class UnaryRI<string mnemonic, bits<12> opcode, SDPatternOperator operator,
|
||||
RegisterOperand cls, ImmOpWithPattern imm>
|
||||
RegisterOperand cls, Immediate imm>
|
||||
: InstRIa<opcode, (outs cls:$R1), (ins imm:$I2),
|
||||
mnemonic#"\t$R1, $I2",
|
||||
[(set cls:$R1, (operator imm:$I2))]>;
|
||||
|
||||
class UnaryRIL<string mnemonic, bits<12> opcode, SDPatternOperator operator,
|
||||
RegisterOperand cls, ImmOpWithPattern imm>
|
||||
RegisterOperand cls, Immediate imm>
|
||||
: InstRILa<opcode, (outs cls:$R1), (ins imm:$I2),
|
||||
mnemonic#"\t$R1, $I2",
|
||||
[(set cls:$R1, (operator imm:$I2))]>;
|
||||
|
@ -2885,14 +2885,14 @@ multiclass UnaryRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode,
|
|||
}
|
||||
|
||||
class UnaryVRIa<string mnemonic, bits<16> opcode, SDPatternOperator operator,
|
||||
TypedReg tr, ImmOpWithPattern imm, bits<4> type = 0>
|
||||
TypedReg tr, Immediate imm, bits<4> type = 0>
|
||||
: InstVRIa<opcode, (outs tr.op:$V1), (ins imm:$I2),
|
||||
mnemonic#"\t$V1, $I2",
|
||||
[(set (tr.vt tr.op:$V1), (operator (i32 timm:$I2)))]> {
|
||||
[(set (tr.vt tr.op:$V1), (operator imm:$I2))]> {
|
||||
let M3 = type;
|
||||
}
|
||||
|
||||
class UnaryVRIaGeneric<string mnemonic, bits<16> opcode, ImmOpWithPattern imm>
|
||||
class UnaryVRIaGeneric<string mnemonic, bits<16> opcode, Immediate imm>
|
||||
: InstVRIa<opcode, (outs VR128:$V1), (ins imm:$I2, imm32zx4:$M3),
|
||||
mnemonic#"\t$V1, $I2, $M3", []>;
|
||||
|
||||
|
@ -3021,7 +3021,7 @@ class SideEffectBinaryRRFc<string mnemonic, bits<16> opcode,
|
|||
}
|
||||
|
||||
class SideEffectBinaryIE<string mnemonic, bits<16> opcode,
|
||||
ImmOpWithPattern imm1, ImmOpWithPattern imm2>
|
||||
Immediate imm1, Immediate imm2>
|
||||
: InstIE<opcode, (outs), (ins imm1:$I1, imm2:$I2),
|
||||
mnemonic#"\t$I1, $I2", []>;
|
||||
|
||||
|
@ -3030,7 +3030,7 @@ class SideEffectBinarySI<string mnemonic, bits<8> opcode, Operand imm>
|
|||
mnemonic#"\t$BD1, $I2", []>;
|
||||
|
||||
class SideEffectBinarySIL<string mnemonic, bits<16> opcode,
|
||||
SDPatternOperator operator, ImmOpWithPattern imm>
|
||||
SDPatternOperator operator, Immediate imm>
|
||||
: InstSIL<opcode, (outs), (ins bdaddr12only:$BD1, imm:$I2),
|
||||
mnemonic#"\t$BD1, $I2", [(operator bdaddr12only:$BD1, imm:$I2)]>;
|
||||
|
||||
|
@ -3165,7 +3165,7 @@ class BinaryRRFc<string mnemonic, bits<16> opcode,
|
|||
mnemonic#"\t$R1, $R2, $M3", []>;
|
||||
|
||||
class BinaryMemRRFc<string mnemonic, bits<16> opcode,
|
||||
RegisterOperand cls1, RegisterOperand cls2, ImmOpWithPattern imm>
|
||||
RegisterOperand cls1, RegisterOperand cls2, Immediate imm>
|
||||
: InstRRFc<opcode, (outs cls2:$R2, cls1:$R1), (ins cls1:$R1src, imm:$M3),
|
||||
mnemonic#"\t$R1, $R2, $M3", []> {
|
||||
let Constraints = "$R1 = $R1src";
|
||||
|
@ -3267,7 +3267,7 @@ multiclass CondBinaryRRFaPair<string mnemonic, bits<16> opcode,
|
|||
}
|
||||
|
||||
class BinaryRI<string mnemonic, bits<12> opcode, SDPatternOperator operator,
|
||||
RegisterOperand cls, ImmOpWithPattern imm>
|
||||
RegisterOperand cls, Immediate imm>
|
||||
: InstRIa<opcode, (outs cls:$R1), (ins cls:$R1src, imm:$I2),
|
||||
mnemonic#"\t$R1, $I2",
|
||||
[(set cls:$R1, (operator cls:$R1src, imm:$I2))]> {
|
||||
|
@ -3276,14 +3276,14 @@ class BinaryRI<string mnemonic, bits<12> opcode, SDPatternOperator operator,
|
|||
}
|
||||
|
||||
class BinaryRIE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
|
||||
RegisterOperand cls, ImmOpWithPattern imm>
|
||||
RegisterOperand cls, Immediate imm>
|
||||
: InstRIEd<opcode, (outs cls:$R1), (ins cls:$R3, imm:$I2),
|
||||
mnemonic#"\t$R1, $R3, $I2",
|
||||
[(set cls:$R1, (operator cls:$R3, imm:$I2))]>;
|
||||
|
||||
multiclass BinaryRIAndK<string mnemonic, bits<12> opcode1, bits<16> opcode2,
|
||||
SDPatternOperator operator, RegisterOperand cls,
|
||||
ImmOpWithPattern imm> {
|
||||
Immediate imm> {
|
||||
let NumOpsKey = mnemonic in {
|
||||
let NumOpsValue = "3" in
|
||||
def K : BinaryRIE<mnemonic##"k", opcode2, operator, cls, imm>,
|
||||
|
@ -3294,7 +3294,7 @@ multiclass BinaryRIAndK<string mnemonic, bits<12> opcode1, bits<16> opcode2,
|
|||
}
|
||||
|
||||
class CondBinaryRIE<string mnemonic, bits<16> opcode, RegisterOperand cls,
|
||||
ImmOpWithPattern imm>
|
||||
Immediate imm>
|
||||
: InstRIEg<opcode, (outs cls:$R1),
|
||||
(ins cls:$R1src, imm:$I2, cond4:$valid, cond4:$M3),
|
||||
mnemonic#"$M3\t$R1, $I2",
|
||||
|
@ -3308,7 +3308,7 @@ class CondBinaryRIE<string mnemonic, bits<16> opcode, RegisterOperand cls,
|
|||
// Like CondBinaryRIE, but used for the raw assembly form. The condition-code
|
||||
// mask is the third operand rather than being part of the mnemonic.
|
||||
class AsmCondBinaryRIE<string mnemonic, bits<16> opcode, RegisterOperand cls,
|
||||
ImmOpWithPattern imm>
|
||||
Immediate imm>
|
||||
: InstRIEg<opcode, (outs cls:$R1),
|
||||
(ins cls:$R1src, imm:$I2, imm32zx4:$M3),
|
||||
mnemonic#"\t$R1, $I2, $M3", []> {
|
||||
|
@ -3318,7 +3318,7 @@ class AsmCondBinaryRIE<string mnemonic, bits<16> opcode, RegisterOperand cls,
|
|||
|
||||
// Like CondBinaryRIE, but with a fixed CC mask.
|
||||
class FixedCondBinaryRIE<CondVariant V, string mnemonic, bits<16> opcode,
|
||||
RegisterOperand cls, ImmOpWithPattern imm>
|
||||
RegisterOperand cls, Immediate imm>
|
||||
: InstRIEg<opcode, (outs cls:$R1), (ins cls:$R1src, imm:$I2),
|
||||
mnemonic#V.suffix#"\t$R1, $I2", []> {
|
||||
let Constraints = "$R1 = $R1src";
|
||||
|
@ -3328,14 +3328,14 @@ class FixedCondBinaryRIE<CondVariant V, string mnemonic, bits<16> opcode,
|
|||
}
|
||||
|
||||
multiclass CondBinaryRIEPair<string mnemonic, bits<16> opcode,
|
||||
RegisterOperand cls, ImmOpWithPattern imm> {
|
||||
RegisterOperand cls, Immediate imm> {
|
||||
let isCodeGenOnly = 1 in
|
||||
def "" : CondBinaryRIE<mnemonic, opcode, cls, imm>;
|
||||
def Asm : AsmCondBinaryRIE<mnemonic, opcode, cls, imm>;
|
||||
}
|
||||
|
||||
class BinaryRIL<string mnemonic, bits<12> opcode, SDPatternOperator operator,
|
||||
RegisterOperand cls, ImmOpWithPattern imm>
|
||||
RegisterOperand cls, Immediate imm>
|
||||
: InstRILa<opcode, (outs cls:$R1), (ins cls:$R1src, imm:$I2),
|
||||
mnemonic#"\t$R1, $I2",
|
||||
[(set cls:$R1, (operator cls:$R1src, imm:$I2))]> {
|
||||
|
@ -3484,7 +3484,7 @@ class BinaryVRIb<string mnemonic, bits<16> opcode, SDPatternOperator operator,
|
|||
TypedReg tr, bits<4> type>
|
||||
: InstVRIb<opcode, (outs tr.op:$V1), (ins imm32zx8:$I2, imm32zx8:$I3),
|
||||
mnemonic#"\t$V1, $I2, $I3",
|
||||
[(set (tr.vt tr.op:$V1), (operator imm32zx8_timm:$I2, imm32zx8_timm:$I3))]> {
|
||||
[(set (tr.vt tr.op:$V1), (operator imm32zx8:$I2, imm32zx8:$I3))]> {
|
||||
let M4 = type;
|
||||
}
|
||||
|
||||
|
@ -3498,7 +3498,7 @@ class BinaryVRIc<string mnemonic, bits<16> opcode, SDPatternOperator operator,
|
|||
: InstVRIc<opcode, (outs tr1.op:$V1), (ins tr2.op:$V3, imm32zx16:$I2),
|
||||
mnemonic#"\t$V1, $V3, $I2",
|
||||
[(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V3),
|
||||
imm32zx16_timm:$I2))]> {
|
||||
imm32zx16:$I2))]> {
|
||||
let M4 = type;
|
||||
}
|
||||
|
||||
|
@ -3512,7 +3512,7 @@ class BinaryVRIe<string mnemonic, bits<16> opcode, SDPatternOperator operator,
|
|||
: InstVRIe<opcode, (outs tr1.op:$V1), (ins tr2.op:$V2, imm32zx12:$I3),
|
||||
mnemonic#"\t$V1, $V2, $I3",
|
||||
[(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V2),
|
||||
imm32zx12_timm:$I3))]> {
|
||||
imm32zx12:$I3))]> {
|
||||
let M4 = type;
|
||||
let M5 = m5;
|
||||
}
|
||||
|
@ -3715,7 +3715,7 @@ class BinaryVRX<string mnemonic, bits<16> opcode, SDPatternOperator operator,
|
|||
: InstVRX<opcode, (outs VR128:$V1), (ins bdxaddr12only:$XBD2, imm32zx4:$M3),
|
||||
mnemonic#"\t$V1, $XBD2, $M3",
|
||||
[(set (tr.vt tr.op:$V1), (operator bdxaddr12only:$XBD2,
|
||||
imm32zx4_timm:$M3))]> {
|
||||
imm32zx4:$M3))]> {
|
||||
let mayLoad = 1;
|
||||
let AccessBytes = bytes;
|
||||
}
|
||||
|
@ -3765,7 +3765,7 @@ class BinaryVSI<string mnemonic, bits<16> opcode, SDPatternOperator operator,
|
|||
}
|
||||
|
||||
class StoreBinaryVRV<string mnemonic, bits<16> opcode, bits<5> bytes,
|
||||
ImmOpWithPattern index>
|
||||
Immediate index>
|
||||
: InstVRV<opcode, (outs), (ins VR128:$V1, bdvaddr12only:$VBD2, index:$M3),
|
||||
mnemonic#"\t$V1, $VBD2, $M3", []> {
|
||||
let mayStore = 1;
|
||||
|
@ -3774,7 +3774,7 @@ class StoreBinaryVRV<string mnemonic, bits<16> opcode, bits<5> bytes,
|
|||
|
||||
class StoreBinaryVRX<string mnemonic, bits<16> opcode,
|
||||
SDPatternOperator operator, TypedReg tr, bits<5> bytes,
|
||||
ImmOpWithPattern index>
|
||||
Immediate index>
|
||||
: InstVRX<opcode, (outs), (ins tr.op:$V1, bdxaddr12only:$XBD2, index:$M3),
|
||||
mnemonic#"\t$V1, $XBD2, $M3",
|
||||
[(operator (tr.vt tr.op:$V1), bdxaddr12only:$XBD2, index:$M3)]> {
|
||||
|
@ -3809,7 +3809,7 @@ class CompareRRE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
|
|||
}
|
||||
|
||||
class CompareRI<string mnemonic, bits<12> opcode, SDPatternOperator operator,
|
||||
RegisterOperand cls, ImmOpWithPattern imm>
|
||||
RegisterOperand cls, Immediate imm>
|
||||
: InstRIa<opcode, (outs), (ins cls:$R1, imm:$I2),
|
||||
mnemonic#"\t$R1, $I2",
|
||||
[(set CC, (operator cls:$R1, imm:$I2))]> {
|
||||
|
@ -3817,7 +3817,7 @@ class CompareRI<string mnemonic, bits<12> opcode, SDPatternOperator operator,
|
|||
}
|
||||
|
||||
class CompareRIL<string mnemonic, bits<12> opcode, SDPatternOperator operator,
|
||||
RegisterOperand cls, ImmOpWithPattern imm>
|
||||
RegisterOperand cls, Immediate imm>
|
||||
: InstRILa<opcode, (outs), (ins cls:$R1, imm:$I2),
|
||||
mnemonic#"\t$R1, $I2",
|
||||
[(set CC, (operator cls:$R1, imm:$I2))]> {
|
||||
|
@ -3924,7 +3924,7 @@ class CompareSSb<string mnemonic, bits<8> opcode>
|
|||
}
|
||||
|
||||
class CompareSI<string mnemonic, bits<8> opcode, SDPatternOperator operator,
|
||||
SDPatternOperator load, ImmOpWithPattern imm,
|
||||
SDPatternOperator load, Immediate imm,
|
||||
AddressingMode mode = bdaddr12only>
|
||||
: InstSI<opcode, (outs), (ins mode:$BD1, imm:$I2),
|
||||
mnemonic#"\t$BD1, $I2",
|
||||
|
@ -3934,7 +3934,7 @@ class CompareSI<string mnemonic, bits<8> opcode, SDPatternOperator operator,
|
|||
}
|
||||
|
||||
class CompareSIL<string mnemonic, bits<16> opcode, SDPatternOperator operator,
|
||||
SDPatternOperator load, ImmOpWithPattern imm>
|
||||
SDPatternOperator load, Immediate imm>
|
||||
: InstSIL<opcode, (outs), (ins bdaddr12only:$BD1, imm:$I2),
|
||||
mnemonic#"\t$BD1, $I2",
|
||||
[(set CC, (operator (load bdaddr12only:$BD1), imm:$I2))]> {
|
||||
|
@ -3943,7 +3943,7 @@ class CompareSIL<string mnemonic, bits<16> opcode, SDPatternOperator operator,
|
|||
}
|
||||
|
||||
class CompareSIY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
|
||||
SDPatternOperator load, ImmOpWithPattern imm,
|
||||
SDPatternOperator load, Immediate imm,
|
||||
AddressingMode mode = bdaddr20only>
|
||||
: InstSIY<opcode, (outs), (ins mode:$BD1, imm:$I2),
|
||||
mnemonic#"\t$BD1, $I2",
|
||||
|
@ -3954,7 +3954,7 @@ class CompareSIY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
|
|||
|
||||
multiclass CompareSIPair<string mnemonic, bits<8> siOpcode, bits<16> siyOpcode,
|
||||
SDPatternOperator operator, SDPatternOperator load,
|
||||
ImmOpWithPattern imm> {
|
||||
Immediate imm> {
|
||||
let DispKey = mnemonic in {
|
||||
let DispSize = "12" in
|
||||
def "" : CompareSI<mnemonic, siOpcode, operator, load, imm, bdaddr12pair>;
|
||||
|
@ -4012,7 +4012,7 @@ class TestRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
|
|||
}
|
||||
|
||||
class TestBinarySIL<string mnemonic, bits<16> opcode,
|
||||
SDPatternOperator operator, ImmOpWithPattern imm>
|
||||
SDPatternOperator operator, Immediate imm>
|
||||
: InstSIL<opcode, (outs), (ins bdaddr12only:$BD1, imm:$I2),
|
||||
mnemonic#"\t$BD1, $I2",
|
||||
[(set CC, (operator bdaddr12only:$BD1, imm:$I2))]>;
|
||||
|
@ -4073,7 +4073,7 @@ class SideEffectTernaryMemMemMemRRFb<string mnemonic, bits<16> opcode,
|
|||
|
||||
class SideEffectTernaryRRFc<string mnemonic, bits<16> opcode,
|
||||
RegisterOperand cls1, RegisterOperand cls2,
|
||||
ImmOpWithPattern imm>
|
||||
Immediate imm>
|
||||
: InstRRFc<opcode, (outs), (ins cls1:$R1, cls2:$R2, imm:$M3),
|
||||
mnemonic#"\t$R1, $R2, $M3", []>;
|
||||
|
||||
|
@ -4086,7 +4086,7 @@ multiclass SideEffectTernaryRRFcOpt<string mnemonic, bits<16> opcode,
|
|||
|
||||
class SideEffectTernaryMemMemRRFc<string mnemonic, bits<16> opcode,
|
||||
RegisterOperand cls1, RegisterOperand cls2,
|
||||
ImmOpWithPattern imm>
|
||||
Immediate imm>
|
||||
: InstRRFc<opcode, (outs cls1:$R1, cls2:$R2),
|
||||
(ins cls1:$R1src, cls2:$R2src, imm:$M3),
|
||||
mnemonic#"\t$R1, $R2, $M3", []> {
|
||||
|
@ -4221,7 +4221,7 @@ class TernaryRXF<string mnemonic, bits<16> opcode, SDPatternOperator operator,
|
|||
}
|
||||
|
||||
class TernaryVRIa<string mnemonic, bits<16> opcode, SDPatternOperator operator,
|
||||
TypedReg tr1, TypedReg tr2, ImmOpWithPattern imm, ImmOpWithPattern index>
|
||||
TypedReg tr1, TypedReg tr2, Immediate imm, Immediate index>
|
||||
: InstVRIa<opcode, (outs tr1.op:$V1), (ins tr2.op:$V1src, imm:$I2, index:$M3),
|
||||
mnemonic#"\t$V1, $I2, $M3",
|
||||
[(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V1src),
|
||||
|
@ -4237,7 +4237,7 @@ class TernaryVRId<string mnemonic, bits<16> opcode, SDPatternOperator operator,
|
|||
mnemonic#"\t$V1, $V2, $V3, $I4",
|
||||
[(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V2),
|
||||
(tr2.vt tr2.op:$V3),
|
||||
imm32zx8_timm:$I4))]> {
|
||||
imm32zx8:$I4))]> {
|
||||
let M5 = type;
|
||||
}
|
||||
|
||||
|
@ -4252,8 +4252,8 @@ class TernaryVRRa<string mnemonic, bits<16> opcode, SDPatternOperator operator,
|
|||
(ins tr2.op:$V2, imm32zx4:$M4, imm32zx4:$M5),
|
||||
mnemonic#"\t$V1, $V2, $M4, $M5",
|
||||
[(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V2),
|
||||
imm32zx4_timm:$M4,
|
||||
imm32zx4_timm:$M5))],
|
||||
imm32zx4:$M4,
|
||||
imm32zx4:$M5))],
|
||||
m4or> {
|
||||
let M3 = type;
|
||||
}
|
||||
|
@ -4285,13 +4285,13 @@ multiclass TernaryOptVRRbSPair<string mnemonic, bits<16> opcode,
|
|||
TypedReg tr1, TypedReg tr2, bits<4> type,
|
||||
bits<4> modifier = 0> {
|
||||
def "" : TernaryVRRb<mnemonic, opcode, operator, tr1, tr2, type,
|
||||
imm32zx4even_timm, !and (modifier, 14)>;
|
||||
imm32zx4even, !and (modifier, 14)>;
|
||||
def : InstAlias<mnemonic#"\t$V1, $V2, $V3",
|
||||
(!cast<Instruction>(NAME) tr1.op:$V1, tr2.op:$V2,
|
||||
tr2.op:$V3, 0)>;
|
||||
let Defs = [CC] in
|
||||
def S : TernaryVRRb<mnemonic##"s", opcode, operator_cc, tr1, tr2, type,
|
||||
imm32zx4even_timm, !add(!and (modifier, 14), 1)>;
|
||||
imm32zx4even, !add(!and (modifier, 14), 1)>;
|
||||
def : InstAlias<mnemonic#"s\t$V1, $V2, $V3",
|
||||
(!cast<Instruction>(NAME#"S") tr1.op:$V1, tr2.op:$V2,
|
||||
tr2.op:$V3, 0)>;
|
||||
|
@ -4314,7 +4314,7 @@ class TernaryVRRc<string mnemonic, bits<16> opcode, SDPatternOperator operator,
|
|||
mnemonic#"\t$V1, $V2, $V3, $M4",
|
||||
[(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V2),
|
||||
(tr2.vt tr2.op:$V3),
|
||||
imm32zx4_timm:$M4))]> {
|
||||
imm32zx4:$M4))]> {
|
||||
let M5 = 0;
|
||||
let M6 = 0;
|
||||
}
|
||||
|
@ -4327,7 +4327,7 @@ class TernaryVRRcFloat<string mnemonic, bits<16> opcode,
|
|||
mnemonic#"\t$V1, $V2, $V3, $M6",
|
||||
[(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V2),
|
||||
(tr2.vt tr2.op:$V3),
|
||||
imm32zx4_timm:$M6))]> {
|
||||
imm32zx4:$M6))]> {
|
||||
let M4 = type;
|
||||
let M5 = m5;
|
||||
}
|
||||
|
@ -4429,7 +4429,7 @@ class TernaryVRSbGeneric<string mnemonic, bits<16> opcode>
|
|||
}
|
||||
|
||||
class TernaryVRV<string mnemonic, bits<16> opcode, bits<5> bytes,
|
||||
ImmOpWithPattern index>
|
||||
Immediate index>
|
||||
: InstVRV<opcode, (outs VR128:$V1),
|
||||
(ins VR128:$V1src, bdvaddr12only:$VBD2, index:$M3),
|
||||
mnemonic#"\t$V1, $VBD2, $M3", []> {
|
||||
|
@ -4440,7 +4440,7 @@ class TernaryVRV<string mnemonic, bits<16> opcode, bits<5> bytes,
|
|||
}
|
||||
|
||||
class TernaryVRX<string mnemonic, bits<16> opcode, SDPatternOperator operator,
|
||||
TypedReg tr1, TypedReg tr2, bits<5> bytes, ImmOpWithPattern index>
|
||||
TypedReg tr1, TypedReg tr2, bits<5> bytes, Immediate index>
|
||||
: InstVRX<opcode, (outs tr1.op:$V1),
|
||||
(ins tr2.op:$V1src, bdxaddr12only:$XBD2, index:$M3),
|
||||
mnemonic#"\t$V1, $XBD2, $M3",
|
||||
|
@ -4461,7 +4461,7 @@ class QuaternaryVRId<string mnemonic, bits<16> opcode, SDPatternOperator operato
|
|||
[(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V1src),
|
||||
(tr2.vt tr2.op:$V2),
|
||||
(tr2.vt tr2.op:$V3),
|
||||
imm32zx8_timm:$I4))]> {
|
||||
imm32zx8:$I4))]> {
|
||||
let Constraints = "$V1 = $V1src";
|
||||
let DisableEncoding = "$V1src";
|
||||
let M5 = type;
|
||||
|
@ -4480,7 +4480,7 @@ class QuaternaryVRIf<string mnemonic, bits<16> opcode>
|
|||
: InstVRIf<opcode, (outs VR128:$V1),
|
||||
(ins VR128:$V2, VR128:$V3,
|
||||
imm32zx8:$I4, imm32zx4:$M5),
|
||||
mnemonic#"\t$V1, $V2, $V3, $I4, $M5", []>;
|
||||
mnemonic#"\t$V1, $V2, $V3, $I4, $M5", []>;
|
||||
|
||||
class QuaternaryVRIg<string mnemonic, bits<16> opcode>
|
||||
: InstVRIg<opcode, (outs VR128:$V1),
|
||||
|
@ -4491,7 +4491,7 @@ class QuaternaryVRIg<string mnemonic, bits<16> opcode>
|
|||
class QuaternaryVRRd<string mnemonic, bits<16> opcode,
|
||||
SDPatternOperator operator, TypedReg tr1, TypedReg tr2,
|
||||
TypedReg tr3, TypedReg tr4, bits<4> type,
|
||||
SDPatternOperator m6mask = imm32zx4_timm, bits<4> m6or = 0>
|
||||
SDPatternOperator m6mask = imm32zx4, bits<4> m6or = 0>
|
||||
: InstVRRd<opcode, (outs tr1.op:$V1),
|
||||
(ins tr2.op:$V2, tr3.op:$V3, tr4.op:$V4, m6mask:$M6),
|
||||
mnemonic#"\t$V1, $V2, $V3, $V4, $M6",
|
||||
|
@ -4518,14 +4518,14 @@ multiclass QuaternaryOptVRRdSPair<string mnemonic, bits<16> opcode,
|
|||
bits<4> modifier = 0> {
|
||||
def "" : QuaternaryVRRd<mnemonic, opcode, operator,
|
||||
tr1, tr2, tr2, tr2, type,
|
||||
imm32zx4even_timm, !and (modifier, 14)>;
|
||||
imm32zx4even, !and (modifier, 14)>;
|
||||
def : InstAlias<mnemonic#"\t$V1, $V2, $V3, $V4",
|
||||
(!cast<Instruction>(NAME) tr1.op:$V1, tr2.op:$V2,
|
||||
tr2.op:$V3, tr2.op:$V4, 0)>;
|
||||
let Defs = [CC] in
|
||||
def S : QuaternaryVRRd<mnemonic##"s", opcode, operator_cc,
|
||||
tr1, tr2, tr2, tr2, type,
|
||||
imm32zx4even_timm, !add (!and (modifier, 14), 1)>;
|
||||
imm32zx4even, !add (!and (modifier, 14), 1)>;
|
||||
def : InstAlias<mnemonic#"s\t$V1, $V2, $V3, $V4",
|
||||
(!cast<Instruction>(NAME#"S") tr1.op:$V1, tr2.op:$V2,
|
||||
tr2.op:$V3, tr2.op:$V4, 0)>;
|
||||
|
@ -4536,7 +4536,7 @@ multiclass QuaternaryOptVRRdSPairGeneric<string mnemonic, bits<16> opcode> {
|
|||
def "" : QuaternaryVRRdGeneric<mnemonic, opcode>;
|
||||
def : InstAlias<mnemonic#"\t$V1, $V2, $V3, $V4, $M5",
|
||||
(!cast<Instruction>(NAME) VR128:$V1, VR128:$V2, VR128:$V3,
|
||||
VR128:$V4, imm32zx4_timm:$M5, 0)>;
|
||||
VR128:$V4, imm32zx4:$M5, 0)>;
|
||||
}
|
||||
|
||||
class SideEffectQuaternaryRRFa<string mnemonic, bits<16> opcode,
|
||||
|
@ -4638,13 +4638,13 @@ class RotateSelectRIEf<string mnemonic, bits<16> opcode, RegisterOperand cls1,
|
|||
class PrefetchRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator>
|
||||
: InstRXYb<opcode, (outs), (ins imm32zx4:$M1, bdxaddr20only:$XBD2),
|
||||
mnemonic##"\t$M1, $XBD2",
|
||||
[(operator imm32zx4_timm:$M1, bdxaddr20only:$XBD2)]>;
|
||||
[(operator imm32zx4:$M1, bdxaddr20only:$XBD2)]>;
|
||||
|
||||
class PrefetchRILPC<string mnemonic, bits<12> opcode,
|
||||
SDPatternOperator operator>
|
||||
: InstRILc<opcode, (outs), (ins imm32zx4_timm:$M1, pcrel32:$RI2),
|
||||
: InstRILc<opcode, (outs), (ins imm32zx4:$M1, pcrel32:$RI2),
|
||||
mnemonic##"\t$M1, $RI2",
|
||||
[(operator imm32zx4_timm:$M1, pcrel32:$RI2)]> {
|
||||
[(operator imm32zx4:$M1, pcrel32:$RI2)]> {
|
||||
// We want PC-relative addresses to be tried ahead of BD and BDX addresses.
|
||||
// However, BDXs have two extra operands and are therefore 6 units more
|
||||
// complex.
|
||||
|
@ -4691,7 +4691,7 @@ class Pseudo<dag outs, dag ins, list<dag> pattern>
|
|||
|
||||
// Like UnaryRI, but expanded after RA depending on the choice of register.
|
||||
class UnaryRIPseudo<SDPatternOperator operator, RegisterOperand cls,
|
||||
ImmOpWithPattern imm>
|
||||
Immediate imm>
|
||||
: Pseudo<(outs cls:$R1), (ins imm:$I2),
|
||||
[(set cls:$R1, (operator imm:$I2))]>;
|
||||
|
||||
|
@ -4720,7 +4720,7 @@ class UnaryRRPseudo<string key, SDPatternOperator operator,
|
|||
|
||||
// Like BinaryRI, but expanded after RA depending on the choice of register.
|
||||
class BinaryRIPseudo<SDPatternOperator operator, RegisterOperand cls,
|
||||
ImmOpWithPattern imm>
|
||||
Immediate imm>
|
||||
: Pseudo<(outs cls:$R1), (ins cls:$R1src, imm:$I2),
|
||||
[(set cls:$R1, (operator cls:$R1src, imm:$I2))]> {
|
||||
let Constraints = "$R1 = $R1src";
|
||||
|
@ -4728,13 +4728,13 @@ class BinaryRIPseudo<SDPatternOperator operator, RegisterOperand cls,
|
|||
|
||||
// Like BinaryRIE, but expanded after RA depending on the choice of register.
|
||||
class BinaryRIEPseudo<SDPatternOperator operator, RegisterOperand cls,
|
||||
ImmOpWithPattern imm>
|
||||
Immediate imm>
|
||||
: Pseudo<(outs cls:$R1), (ins cls:$R3, imm:$I2),
|
||||
[(set cls:$R1, (operator cls:$R3, imm:$I2))]>;
|
||||
|
||||
// Like BinaryRIAndK, but expanded after RA depending on the choice of register.
|
||||
multiclass BinaryRIAndKPseudo<string key, SDPatternOperator operator,
|
||||
RegisterOperand cls, ImmOpWithPattern imm> {
|
||||
RegisterOperand cls, Immediate imm> {
|
||||
let NumOpsKey = key in {
|
||||
let NumOpsValue = "3" in
|
||||
def K : BinaryRIEPseudo<operator, cls, imm>,
|
||||
|
@ -4764,7 +4764,7 @@ class MemFoldPseudo<string mnemonic, RegisterOperand cls, bits<5> bytes,
|
|||
|
||||
// Like CompareRI, but expanded after RA depending on the choice of register.
|
||||
class CompareRIPseudo<SDPatternOperator operator, RegisterOperand cls,
|
||||
ImmOpWithPattern imm>
|
||||
Immediate imm>
|
||||
: Pseudo<(outs), (ins cls:$R1, imm:$I2),
|
||||
[(set CC, (operator cls:$R1, imm:$I2))]> {
|
||||
let isCompare = 1;
|
||||
|
@ -4783,7 +4783,7 @@ class CompareRXYPseudo<SDPatternOperator operator, RegisterOperand cls,
|
|||
}
|
||||
|
||||
// Like TestBinarySIL, but expanded later.
|
||||
class TestBinarySILPseudo<SDPatternOperator operator, ImmOpWithPattern imm>
|
||||
class TestBinarySILPseudo<SDPatternOperator operator, Immediate imm>
|
||||
: Pseudo<(outs), (ins bdaddr12only:$BD1, imm:$I2),
|
||||
[(set CC, (operator bdaddr12only:$BD1, imm:$I2))]>;
|
||||
|
||||
|
@ -4812,7 +4812,7 @@ class CondBinaryRRFaPseudo<RegisterOperand cls1, RegisterOperand cls2,
|
|||
|
||||
// Like CondBinaryRIE, but expanded after RA depending on the choice of
|
||||
// register.
|
||||
class CondBinaryRIEPseudo<RegisterOperand cls, ImmOpWithPattern imm>
|
||||
class CondBinaryRIEPseudo<RegisterOperand cls, Immediate imm>
|
||||
: Pseudo<(outs cls:$R1),
|
||||
(ins cls:$R1src, imm:$I2, cond4:$valid, cond4:$M3),
|
||||
[(set cls:$R1, (z_select_ccmask imm:$I2, cls:$R1src,
|
||||
|
@ -4876,7 +4876,7 @@ class SelectWrapper<ValueType vt, RegisterOperand cls>
|
|||
: Pseudo<(outs cls:$dst),
|
||||
(ins cls:$src1, cls:$src2, imm32zx4:$valid, imm32zx4:$cc),
|
||||
[(set (vt cls:$dst), (z_select_ccmask cls:$src1, cls:$src2,
|
||||
imm32zx4_timm:$valid, imm32zx4_timm:$cc))]> {
|
||||
imm32zx4:$valid, imm32zx4:$cc))]> {
|
||||
let usesCustomInserter = 1;
|
||||
let hasNoSchedulingInfo = 1;
|
||||
let Uses = [CC];
|
||||
|
@ -4890,12 +4890,12 @@ multiclass CondStores<RegisterOperand cls, SDPatternOperator store,
|
|||
def "" : Pseudo<(outs),
|
||||
(ins cls:$new, mode:$addr, imm32zx4:$valid, imm32zx4:$cc),
|
||||
[(store (z_select_ccmask cls:$new, (load mode:$addr),
|
||||
imm32zx4_timm:$valid, imm32zx4_timm:$cc),
|
||||
imm32zx4:$valid, imm32zx4:$cc),
|
||||
mode:$addr)]>;
|
||||
def Inv : Pseudo<(outs),
|
||||
(ins cls:$new, mode:$addr, imm32zx4:$valid, imm32zx4:$cc),
|
||||
[(store (z_select_ccmask (load mode:$addr), cls:$new,
|
||||
imm32zx4_timm:$valid, imm32zx4_timm:$cc),
|
||||
imm32zx4:$valid, imm32zx4:$cc),
|
||||
mode:$addr)]>;
|
||||
}
|
||||
}
|
||||
|
@ -4917,11 +4917,11 @@ class AtomicLoadBinary<SDPatternOperator operator, RegisterOperand cls,
|
|||
// Specializations of AtomicLoadWBinary.
|
||||
class AtomicLoadBinaryReg32<SDPatternOperator operator>
|
||||
: AtomicLoadBinary<operator, GR32, (i32 GR32:$src2), GR32>;
|
||||
class AtomicLoadBinaryImm32<SDPatternOperator operator, ImmOpWithPattern imm>
|
||||
class AtomicLoadBinaryImm32<SDPatternOperator operator, Immediate imm>
|
||||
: AtomicLoadBinary<operator, GR32, (i32 imm:$src2), imm>;
|
||||
class AtomicLoadBinaryReg64<SDPatternOperator operator>
|
||||
: AtomicLoadBinary<operator, GR64, (i64 GR64:$src2), GR64>;
|
||||
class AtomicLoadBinaryImm64<SDPatternOperator operator, ImmOpWithPattern imm>
|
||||
class AtomicLoadBinaryImm64<SDPatternOperator operator, Immediate imm>
|
||||
: AtomicLoadBinary<operator, GR64, (i64 imm:$src2), imm>;
|
||||
|
||||
// OPERATOR is ATOMIC_SWAPW or an ATOMIC_LOADW_* operation. PAT and OPERAND
|
||||
|
@ -4944,7 +4944,7 @@ class AtomicLoadWBinary<SDPatternOperator operator, dag pat,
|
|||
// Specializations of AtomicLoadWBinary.
|
||||
class AtomicLoadWBinaryReg<SDPatternOperator operator>
|
||||
: AtomicLoadWBinary<operator, (i32 GR32:$src2), GR32>;
|
||||
class AtomicLoadWBinaryImm<SDPatternOperator operator, ImmOpWithPattern imm>
|
||||
class AtomicLoadWBinaryImm<SDPatternOperator operator, Immediate imm>
|
||||
: AtomicLoadWBinary<operator, (i32 imm:$src2), imm>;
|
||||
|
||||
// A pseudo instruction that is a direct alias of a real instruction.
|
||||
|
@ -4979,7 +4979,7 @@ class StoreAliasVRX<SDPatternOperator operator, TypedReg tr,
|
|||
|
||||
// An alias of a BinaryRI, but with different register sizes.
|
||||
class BinaryAliasRI<SDPatternOperator operator, RegisterOperand cls,
|
||||
ImmOpWithPattern imm>
|
||||
Immediate imm>
|
||||
: Alias<4, (outs cls:$R1), (ins cls:$R1src, imm:$I2),
|
||||
[(set cls:$R1, (operator cls:$R1src, imm:$I2))]> {
|
||||
let Constraints = "$R1 = $R1src";
|
||||
|
@ -4987,7 +4987,7 @@ class BinaryAliasRI<SDPatternOperator operator, RegisterOperand cls,
|
|||
|
||||
// An alias of a BinaryRIL, but with different register sizes.
|
||||
class BinaryAliasRIL<SDPatternOperator operator, RegisterOperand cls,
|
||||
ImmOpWithPattern imm>
|
||||
Immediate imm>
|
||||
: Alias<6, (outs cls:$R1), (ins cls:$R1src, imm:$I2),
|
||||
[(set cls:$R1, (operator cls:$R1src, imm:$I2))]> {
|
||||
let Constraints = "$R1 = $R1src";
|
||||
|
@ -4999,7 +4999,7 @@ class BinaryAliasVRRf<RegisterOperand cls>
|
|||
|
||||
// An alias of a CompareRI, but with different register sizes.
|
||||
class CompareAliasRI<SDPatternOperator operator, RegisterOperand cls,
|
||||
ImmOpWithPattern imm>
|
||||
Immediate imm>
|
||||
: Alias<4, (outs), (ins cls:$R1, imm:$I2),
|
||||
[(set CC, (operator cls:$R1, imm:$I2))]> {
|
||||
let isCompare = 1;
|
||||
|
|
|
@ -60,7 +60,7 @@ let Predicates = [FeatureVector] in {
|
|||
// Generate byte mask.
|
||||
def VZERO : InherentVRIa<"vzero", 0xE744, 0>;
|
||||
def VONE : InherentVRIa<"vone", 0xE744, 0xffff>;
|
||||
def VGBM : UnaryVRIa<"vgbm", 0xE744, z_byte_mask, v128b, imm32zx16_timm>;
|
||||
def VGBM : UnaryVRIa<"vgbm", 0xE744, z_byte_mask, v128b, imm32zx16>;
|
||||
|
||||
// Generate mask.
|
||||
def VGM : BinaryVRIbGeneric<"vgm", 0xE746>;
|
||||
|
@ -71,10 +71,10 @@ let Predicates = [FeatureVector] in {
|
|||
|
||||
// Replicate immediate.
|
||||
def VREPI : UnaryVRIaGeneric<"vrepi", 0xE745, imm32sx16>;
|
||||
def VREPIB : UnaryVRIa<"vrepib", 0xE745, z_replicate, v128b, imm32sx16_timm, 0>;
|
||||
def VREPIH : UnaryVRIa<"vrepih", 0xE745, z_replicate, v128h, imm32sx16_timm, 1>;
|
||||
def VREPIF : UnaryVRIa<"vrepif", 0xE745, z_replicate, v128f, imm32sx16_timm, 2>;
|
||||
def VREPIG : UnaryVRIa<"vrepig", 0xE745, z_replicate, v128g, imm32sx16_timm, 3>;
|
||||
def VREPIB : UnaryVRIa<"vrepib", 0xE745, z_replicate, v128b, imm32sx16, 0>;
|
||||
def VREPIH : UnaryVRIa<"vrepih", 0xE745, z_replicate, v128h, imm32sx16, 1>;
|
||||
def VREPIF : UnaryVRIa<"vrepif", 0xE745, z_replicate, v128f, imm32sx16, 2>;
|
||||
def VREPIG : UnaryVRIa<"vrepig", 0xE745, z_replicate, v128g, imm32sx16, 3>;
|
||||
}
|
||||
|
||||
// Load element immediate.
|
||||
|
@ -116,7 +116,7 @@ let Predicates = [FeatureVector] in {
|
|||
(ins bdxaddr12only:$XBD2, imm32zx4:$M3),
|
||||
"lcbb\t$R1, $XBD2, $M3",
|
||||
[(set GR32:$R1, (int_s390_lcbb bdxaddr12only:$XBD2,
|
||||
imm32zx4_timm:$M3))]>;
|
||||
imm32zx4:$M3))]>;
|
||||
|
||||
// Load with length. The number of loaded bytes is only known at run time.
|
||||
def VLL : BinaryVRSb<"vll", 0xE737, int_s390_vll, 0>;
|
||||
|
@ -362,9 +362,9 @@ let Predicates = [FeatureVector] in {
|
|||
def VREPH : BinaryVRIc<"vreph", 0xE74D, z_splat, v128h, v128h, 1>;
|
||||
def VREPF : BinaryVRIc<"vrepf", 0xE74D, z_splat, v128f, v128f, 2>;
|
||||
def VREPG : BinaryVRIc<"vrepg", 0xE74D, z_splat, v128g, v128g, 3>;
|
||||
def : Pat<(v4f32 (z_splat VR128:$vec, imm32zx16_timm:$index)),
|
||||
def : Pat<(v4f32 (z_splat VR128:$vec, imm32zx16:$index)),
|
||||
(VREPF VR128:$vec, imm32zx16:$index)>;
|
||||
def : Pat<(v2f64 (z_splat VR128:$vec, imm32zx16_timm:$index)),
|
||||
def : Pat<(v2f64 (z_splat VR128:$vec, imm32zx16:$index)),
|
||||
(VREPG VR128:$vec, imm32zx16:$index)>;
|
||||
|
||||
// Select.
|
||||
|
@ -778,7 +778,7 @@ let Predicates = [FeatureVector] in {
|
|||
|
||||
// Shift left double by byte.
|
||||
def VSLDB : TernaryVRId<"vsldb", 0xE777, z_shl_double, v128b, v128b, 0>;
|
||||
def : Pat<(int_s390_vsldb VR128:$x, VR128:$y, imm32zx8_timm:$z),
|
||||
def : Pat<(int_s390_vsldb VR128:$x, VR128:$y, imm32zx8:$z),
|
||||
(VSLDB VR128:$x, VR128:$y, imm32zx8:$z)>;
|
||||
|
||||
// Shift left double by bit.
|
||||
|
|
|
@ -21,32 +21,15 @@ class ImmediateTLSAsmOperand<string name>
|
|||
let RenderMethod = "addImmTLSOperands";
|
||||
}
|
||||
|
||||
class ImmediateOp<ValueType vt, string asmop> : Operand<vt> {
|
||||
let PrintMethod = "print"##asmop##"Operand";
|
||||
let DecoderMethod = "decode"##asmop##"Operand";
|
||||
let ParserMatchClass = !cast<AsmOperandClass>(asmop);
|
||||
}
|
||||
|
||||
class ImmOpWithPattern<ValueType vt, string asmop, code pred, SDNodeXForm xform,
|
||||
SDNode ImmNode = imm> :
|
||||
ImmediateOp<vt, asmop>, PatLeaf<(vt ImmNode), pred, xform>;
|
||||
|
||||
// class ImmediatePatLeaf<ValueType vt, code pred,
|
||||
// SDNodeXForm xform, SDNode ImmNode>
|
||||
// : PatLeaf<(vt ImmNode), pred, xform>;
|
||||
|
||||
|
||||
// Constructs both a DAG pattern and instruction operand for an immediate
|
||||
// of type VT. PRED returns true if a node is acceptable and XFORM returns
|
||||
// the operand value associated with the node. ASMOP is the name of the
|
||||
// associated asm operand, and also forms the basis of the asm print method.
|
||||
multiclass Immediate<ValueType vt, code pred, SDNodeXForm xform, string asmop> {
|
||||
// def "" : ImmediateOp<vt, asmop>,
|
||||
// PatLeaf<(vt imm), pred, xform>;
|
||||
def "" : ImmOpWithPattern<vt, asmop, pred, xform>;
|
||||
|
||||
// def _timm : PatLeaf<(vt timm), pred, xform>;
|
||||
def _timm : ImmOpWithPattern<vt, asmop, pred, xform, timm>;
|
||||
class Immediate<ValueType vt, code pred, SDNodeXForm xform, string asmop>
|
||||
: PatLeaf<(vt imm), pred, xform>, Operand<vt> {
|
||||
let PrintMethod = "print"##asmop##"Operand";
|
||||
let DecoderMethod = "decode"##asmop##"Operand";
|
||||
let ParserMatchClass = !cast<AsmOperandClass>(asmop);
|
||||
}
|
||||
|
||||
// Constructs an asm operand for a PC-relative address. SIZE says how
|
||||
|
@ -312,87 +295,87 @@ def U48Imm : ImmediateAsmOperand<"U48Imm">;
|
|||
|
||||
// Immediates for the lower and upper 16 bits of an i32, with the other
|
||||
// bits of the i32 being zero.
|
||||
defm imm32ll16 : Immediate<i32, [{
|
||||
def imm32ll16 : Immediate<i32, [{
|
||||
return SystemZ::isImmLL(N->getZExtValue());
|
||||
}], LL16, "U16Imm">;
|
||||
|
||||
defm imm32lh16 : Immediate<i32, [{
|
||||
def imm32lh16 : Immediate<i32, [{
|
||||
return SystemZ::isImmLH(N->getZExtValue());
|
||||
}], LH16, "U16Imm">;
|
||||
|
||||
// Immediates for the lower and upper 16 bits of an i32, with the other
|
||||
// bits of the i32 being one.
|
||||
defm imm32ll16c : Immediate<i32, [{
|
||||
def imm32ll16c : Immediate<i32, [{
|
||||
return SystemZ::isImmLL(uint32_t(~N->getZExtValue()));
|
||||
}], LL16, "U16Imm">;
|
||||
|
||||
defm imm32lh16c : Immediate<i32, [{
|
||||
def imm32lh16c : Immediate<i32, [{
|
||||
return SystemZ::isImmLH(uint32_t(~N->getZExtValue()));
|
||||
}], LH16, "U16Imm">;
|
||||
|
||||
// Short immediates
|
||||
defm imm32zx1 : Immediate<i32, [{
|
||||
def imm32zx1 : Immediate<i32, [{
|
||||
return isUInt<1>(N->getZExtValue());
|
||||
}], NOOP_SDNodeXForm, "U1Imm">;
|
||||
|
||||
defm imm32zx2 : Immediate<i32, [{
|
||||
def imm32zx2 : Immediate<i32, [{
|
||||
return isUInt<2>(N->getZExtValue());
|
||||
}], NOOP_SDNodeXForm, "U2Imm">;
|
||||
|
||||
defm imm32zx3 : Immediate<i32, [{
|
||||
def imm32zx3 : Immediate<i32, [{
|
||||
return isUInt<3>(N->getZExtValue());
|
||||
}], NOOP_SDNodeXForm, "U3Imm">;
|
||||
|
||||
defm imm32zx4 : Immediate<i32, [{
|
||||
def imm32zx4 : Immediate<i32, [{
|
||||
return isUInt<4>(N->getZExtValue());
|
||||
}], NOOP_SDNodeXForm, "U4Imm">;
|
||||
|
||||
// Note: this enforces an even value during code generation only.
|
||||
// When used from the assembler, any 4-bit value is allowed.
|
||||
defm imm32zx4even : Immediate<i32, [{
|
||||
def imm32zx4even : Immediate<i32, [{
|
||||
return isUInt<4>(N->getZExtValue());
|
||||
}], UIMM8EVEN, "U4Imm">;
|
||||
|
||||
defm imm32zx6 : Immediate<i32, [{
|
||||
def imm32zx6 : Immediate<i32, [{
|
||||
return isUInt<6>(N->getZExtValue());
|
||||
}], NOOP_SDNodeXForm, "U6Imm">;
|
||||
|
||||
defm imm32sx8 : Immediate<i32, [{
|
||||
def imm32sx8 : Immediate<i32, [{
|
||||
return isInt<8>(N->getSExtValue());
|
||||
}], SIMM8, "S8Imm">;
|
||||
|
||||
defm imm32zx8 : Immediate<i32, [{
|
||||
def imm32zx8 : Immediate<i32, [{
|
||||
return isUInt<8>(N->getZExtValue());
|
||||
}], UIMM8, "U8Imm">;
|
||||
|
||||
defm imm32zx8trunc : Immediate<i32, [{}], UIMM8, "U8Imm">;
|
||||
def imm32zx8trunc : Immediate<i32, [{}], UIMM8, "U8Imm">;
|
||||
|
||||
defm imm32zx12 : Immediate<i32, [{
|
||||
def imm32zx12 : Immediate<i32, [{
|
||||
return isUInt<12>(N->getZExtValue());
|
||||
}], UIMM12, "U12Imm">;
|
||||
|
||||
defm imm32sx16 : Immediate<i32, [{
|
||||
def imm32sx16 : Immediate<i32, [{
|
||||
return isInt<16>(N->getSExtValue());
|
||||
}], SIMM16, "S16Imm">;
|
||||
|
||||
defm imm32sx16n : Immediate<i32, [{
|
||||
def imm32sx16n : Immediate<i32, [{
|
||||
return isInt<16>(-N->getSExtValue());
|
||||
}], NEGSIMM16, "S16Imm">;
|
||||
|
||||
defm imm32zx16 : Immediate<i32, [{
|
||||
def imm32zx16 : Immediate<i32, [{
|
||||
return isUInt<16>(N->getZExtValue());
|
||||
}], UIMM16, "U16Imm">;
|
||||
|
||||
defm imm32sx16trunc : Immediate<i32, [{}], SIMM16, "S16Imm">;
|
||||
defm imm32zx16trunc : Immediate<i32, [{}], UIMM16, "U16Imm">;
|
||||
def imm32sx16trunc : Immediate<i32, [{}], SIMM16, "S16Imm">;
|
||||
def imm32zx16trunc : Immediate<i32, [{}], UIMM16, "U16Imm">;
|
||||
|
||||
// Full 32-bit immediates. we need both signed and unsigned versions
|
||||
// because the assembler is picky. E.g. AFI requires signed operands
|
||||
// while NILF requires unsigned ones.
|
||||
defm simm32 : Immediate<i32, [{}], SIMM32, "S32Imm">;
|
||||
defm uimm32 : Immediate<i32, [{}], UIMM32, "U32Imm">;
|
||||
def simm32 : Immediate<i32, [{}], SIMM32, "S32Imm">;
|
||||
def uimm32 : Immediate<i32, [{}], UIMM32, "U32Imm">;
|
||||
|
||||
defm simm32n : Immediate<i32, [{
|
||||
def simm32n : Immediate<i32, [{
|
||||
return isInt<32>(-N->getSExtValue());
|
||||
}], NEGSIMM32, "S32Imm">;
|
||||
|
||||
|
@ -404,107 +387,107 @@ def imm32 : ImmLeaf<i32, [{}]>;
|
|||
|
||||
// Immediates for 16-bit chunks of an i64, with the other bits of the
|
||||
// i32 being zero.
|
||||
defm imm64ll16 : Immediate<i64, [{
|
||||
def imm64ll16 : Immediate<i64, [{
|
||||
return SystemZ::isImmLL(N->getZExtValue());
|
||||
}], LL16, "U16Imm">;
|
||||
|
||||
defm imm64lh16 : Immediate<i64, [{
|
||||
def imm64lh16 : Immediate<i64, [{
|
||||
return SystemZ::isImmLH(N->getZExtValue());
|
||||
}], LH16, "U16Imm">;
|
||||
|
||||
defm imm64hl16 : Immediate<i64, [{
|
||||
def imm64hl16 : Immediate<i64, [{
|
||||
return SystemZ::isImmHL(N->getZExtValue());
|
||||
}], HL16, "U16Imm">;
|
||||
|
||||
defm imm64hh16 : Immediate<i64, [{
|
||||
def imm64hh16 : Immediate<i64, [{
|
||||
return SystemZ::isImmHH(N->getZExtValue());
|
||||
}], HH16, "U16Imm">;
|
||||
|
||||
// Immediates for 16-bit chunks of an i64, with the other bits of the
|
||||
// i32 being one.
|
||||
defm imm64ll16c : Immediate<i64, [{
|
||||
def imm64ll16c : Immediate<i64, [{
|
||||
return SystemZ::isImmLL(uint64_t(~N->getZExtValue()));
|
||||
}], LL16, "U16Imm">;
|
||||
|
||||
defm imm64lh16c : Immediate<i64, [{
|
||||
def imm64lh16c : Immediate<i64, [{
|
||||
return SystemZ::isImmLH(uint64_t(~N->getZExtValue()));
|
||||
}], LH16, "U16Imm">;
|
||||
|
||||
defm imm64hl16c : Immediate<i64, [{
|
||||
def imm64hl16c : Immediate<i64, [{
|
||||
return SystemZ::isImmHL(uint64_t(~N->getZExtValue()));
|
||||
}], HL16, "U16Imm">;
|
||||
|
||||
defm imm64hh16c : Immediate<i64, [{
|
||||
def imm64hh16c : Immediate<i64, [{
|
||||
return SystemZ::isImmHH(uint64_t(~N->getZExtValue()));
|
||||
}], HH16, "U16Imm">;
|
||||
|
||||
// Immediates for the lower and upper 32 bits of an i64, with the other
|
||||
// bits of the i32 being zero.
|
||||
defm imm64lf32 : Immediate<i64, [{
|
||||
def imm64lf32 : Immediate<i64, [{
|
||||
return SystemZ::isImmLF(N->getZExtValue());
|
||||
}], LF32, "U32Imm">;
|
||||
|
||||
defm imm64hf32 : Immediate<i64, [{
|
||||
def imm64hf32 : Immediate<i64, [{
|
||||
return SystemZ::isImmHF(N->getZExtValue());
|
||||
}], HF32, "U32Imm">;
|
||||
|
||||
// Immediates for the lower and upper 32 bits of an i64, with the other
|
||||
// bits of the i32 being one.
|
||||
defm imm64lf32c : Immediate<i64, [{
|
||||
def imm64lf32c : Immediate<i64, [{
|
||||
return SystemZ::isImmLF(uint64_t(~N->getZExtValue()));
|
||||
}], LF32, "U32Imm">;
|
||||
|
||||
defm imm64hf32c : Immediate<i64, [{
|
||||
def imm64hf32c : Immediate<i64, [{
|
||||
return SystemZ::isImmHF(uint64_t(~N->getZExtValue()));
|
||||
}], HF32, "U32Imm">;
|
||||
|
||||
// Negated immediates that fit LF32 or LH16.
|
||||
defm imm64lh16n : Immediate<i64, [{
|
||||
def imm64lh16n : Immediate<i64, [{
|
||||
return SystemZ::isImmLH(uint64_t(-N->getZExtValue()));
|
||||
}], NEGLH16, "U16Imm">;
|
||||
|
||||
defm imm64lf32n : Immediate<i64, [{
|
||||
def imm64lf32n : Immediate<i64, [{
|
||||
return SystemZ::isImmLF(uint64_t(-N->getZExtValue()));
|
||||
}], NEGLF32, "U32Imm">;
|
||||
|
||||
// Short immediates.
|
||||
defm imm64sx8 : Immediate<i64, [{
|
||||
def imm64sx8 : Immediate<i64, [{
|
||||
return isInt<8>(N->getSExtValue());
|
||||
}], SIMM8, "S8Imm">;
|
||||
|
||||
defm imm64zx8 : Immediate<i64, [{
|
||||
def imm64zx8 : Immediate<i64, [{
|
||||
return isUInt<8>(N->getSExtValue());
|
||||
}], UIMM8, "U8Imm">;
|
||||
|
||||
defm imm64sx16 : Immediate<i64, [{
|
||||
def imm64sx16 : Immediate<i64, [{
|
||||
return isInt<16>(N->getSExtValue());
|
||||
}], SIMM16, "S16Imm">;
|
||||
|
||||
defm imm64sx16n : Immediate<i64, [{
|
||||
def imm64sx16n : Immediate<i64, [{
|
||||
return isInt<16>(-N->getSExtValue());
|
||||
}], NEGSIMM16, "S16Imm">;
|
||||
|
||||
defm imm64zx16 : Immediate<i64, [{
|
||||
def imm64zx16 : Immediate<i64, [{
|
||||
return isUInt<16>(N->getZExtValue());
|
||||
}], UIMM16, "U16Imm">;
|
||||
|
||||
defm imm64sx32 : Immediate<i64, [{
|
||||
def imm64sx32 : Immediate<i64, [{
|
||||
return isInt<32>(N->getSExtValue());
|
||||
}], SIMM32, "S32Imm">;
|
||||
|
||||
defm imm64sx32n : Immediate<i64, [{
|
||||
def imm64sx32n : Immediate<i64, [{
|
||||
return isInt<32>(-N->getSExtValue());
|
||||
}], NEGSIMM32, "S32Imm">;
|
||||
|
||||
defm imm64zx32 : Immediate<i64, [{
|
||||
def imm64zx32 : Immediate<i64, [{
|
||||
return isUInt<32>(N->getZExtValue());
|
||||
}], UIMM32, "U32Imm">;
|
||||
|
||||
defm imm64zx32n : Immediate<i64, [{
|
||||
def imm64zx32n : Immediate<i64, [{
|
||||
return isUInt<32>(-N->getSExtValue());
|
||||
}], NEGUIMM32, "U32Imm">;
|
||||
|
||||
defm imm64zx48 : Immediate<i64, [{
|
||||
def imm64zx48 : Immediate<i64, [{
|
||||
return isUInt<64>(N->getZExtValue());
|
||||
}], UIMM48, "U48Imm">;
|
||||
|
||||
|
@ -654,7 +637,7 @@ def bdvaddr12only : BDVMode< "64", "12">;
|
|||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// A 4-bit condition-code mask.
|
||||
def cond4 : PatLeaf<(i32 timm), [{ return (N->getZExtValue() < 16); }]>,
|
||||
def cond4 : PatLeaf<(i32 imm), [{ return (N->getZExtValue() < 16); }]>,
|
||||
Operand<i32> {
|
||||
let PrintMethod = "printCond4Operand";
|
||||
}
|
||||
|
|
|
@ -472,17 +472,17 @@ def z_subcarry : PatFrag<(ops node:$lhs, node:$rhs),
|
|||
(z_subcarry_1 node:$lhs, node:$rhs, CC)>;
|
||||
|
||||
// Signed and unsigned comparisons.
|
||||
def z_scmp : PatFrag<(ops node:$a, node:$b), (z_icmp node:$a, node:$b, timm), [{
|
||||
def z_scmp : PatFrag<(ops node:$a, node:$b), (z_icmp node:$a, node:$b, imm), [{
|
||||
unsigned Type = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
|
||||
return Type != SystemZICMP::UnsignedOnly;
|
||||
}]>;
|
||||
def z_ucmp : PatFrag<(ops node:$a, node:$b), (z_icmp node:$a, node:$b, timm), [{
|
||||
def z_ucmp : PatFrag<(ops node:$a, node:$b), (z_icmp node:$a, node:$b, imm), [{
|
||||
unsigned Type = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
|
||||
return Type != SystemZICMP::SignedOnly;
|
||||
}]>;
|
||||
|
||||
// Register- and memory-based TEST UNDER MASK.
|
||||
def z_tm_reg : PatFrag<(ops node:$a, node:$b), (z_tm node:$a, node:$b, timm)>;
|
||||
def z_tm_reg : PatFrag<(ops node:$a, node:$b), (z_tm node:$a, node:$b, imm)>;
|
||||
def z_tm_mem : PatFrag<(ops node:$a, node:$b), (z_tm node:$a, node:$b, 0)>;
|
||||
|
||||
// Register sign-extend operations. Sub-32-bit values are represented as i32s.
|
||||
|
|
|
@ -100,12 +100,12 @@ multiclass CondStores64<Instruction insn, Instruction insninv,
|
|||
SDPatternOperator store, SDPatternOperator load,
|
||||
AddressingMode mode> {
|
||||
def : Pat<(store (z_select_ccmask GR64:$new, (load mode:$addr),
|
||||
imm32zx4_timm:$valid, imm32zx4_timm:$cc),
|
||||
imm32zx4:$valid, imm32zx4:$cc),
|
||||
mode:$addr),
|
||||
(insn (EXTRACT_SUBREG GR64:$new, subreg_l32), mode:$addr,
|
||||
imm32zx4:$valid, imm32zx4:$cc)>;
|
||||
def : Pat<(store (z_select_ccmask (load mode:$addr), GR64:$new,
|
||||
imm32zx4_timm:$valid, imm32zx4_timm:$cc),
|
||||
imm32zx4:$valid, imm32zx4:$cc),
|
||||
mode:$addr),
|
||||
(insninv (EXTRACT_SUBREG GR64:$new, subreg_l32), mode:$addr,
|
||||
imm32zx4:$valid, imm32zx4:$cc)>;
|
||||
|
|
|
@ -209,10 +209,10 @@ std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::EmitTargetCodeForMemchr(
|
|||
|
||||
// Now select between End and null, depending on whether the character
|
||||
// was found.
|
||||
SDValue Ops[] = {
|
||||
End, DAG.getConstant(0, DL, PtrVT),
|
||||
DAG.getTargetConstant(SystemZ::CCMASK_SRST, DL, MVT::i32),
|
||||
DAG.getTargetConstant(SystemZ::CCMASK_SRST_FOUND, DL, MVT::i32), CCReg};
|
||||
SDValue Ops[] = {End, DAG.getConstant(0, DL, PtrVT),
|
||||
DAG.getConstant(SystemZ::CCMASK_SRST, DL, MVT::i32),
|
||||
DAG.getConstant(SystemZ::CCMASK_SRST_FOUND, DL, MVT::i32),
|
||||
CCReg};
|
||||
End = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, PtrVT, Ops);
|
||||
return std::make_pair(End, Chain);
|
||||
}
|
||||
|
|
|
@ -39,7 +39,7 @@ defm MEMORY_INIT :
|
|||
(ins i32imm_op:$seg, i32imm_op:$idx, I32:$dest,
|
||||
I32:$offset, I32:$size),
|
||||
(outs), (ins i32imm_op:$seg, i32imm_op:$idx),
|
||||
[(int_wasm_memory_init (i32 timm:$seg), (i32 timm:$idx), I32:$dest,
|
||||
[(int_wasm_memory_init (i32 imm:$seg), (i32 imm:$idx), I32:$dest,
|
||||
I32:$offset, I32:$size
|
||||
)],
|
||||
"memory.init\t$seg, $idx, $dest, $offset, $size",
|
||||
|
@ -48,7 +48,7 @@ defm MEMORY_INIT :
|
|||
let hasSideEffects = 1 in
|
||||
defm DATA_DROP :
|
||||
BULK_I<(outs), (ins i32imm_op:$seg), (outs), (ins i32imm_op:$seg),
|
||||
[(int_wasm_data_drop (i32 timm:$seg))],
|
||||
[(int_wasm_data_drop (i32 imm:$seg))],
|
||||
"data.drop\t$seg", "data.drop\t$seg", 0x09>;
|
||||
|
||||
let mayLoad = 1, mayStore = 1 in
|
||||
|
|
|
@ -879,9 +879,10 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
|
|||
case ISD::FRINT: Imm = 0x4; break;
|
||||
}
|
||||
SDLoc dl(N);
|
||||
SDValue Res = CurDAG->getNode(
|
||||
X86ISD::VRNDSCALE, dl, N->getValueType(0), N->getOperand(0),
|
||||
CurDAG->getTargetConstant(Imm, dl, MVT::i8));
|
||||
SDValue Res = CurDAG->getNode(X86ISD::VRNDSCALE, dl,
|
||||
N->getValueType(0),
|
||||
N->getOperand(0),
|
||||
CurDAG->getConstant(Imm, dl, MVT::i8));
|
||||
--I;
|
||||
CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
|
||||
++I;
|
||||
|
@ -5095,9 +5096,10 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
|
|||
case ISD::FRINT: Imm = 0x4; break;
|
||||
}
|
||||
SDLoc dl(Node);
|
||||
SDValue Res = CurDAG->getNode(X86ISD::VRNDSCALE, dl, Node->getValueType(0),
|
||||
SDValue Res = CurDAG->getNode(X86ISD::VRNDSCALE, dl,
|
||||
Node->getValueType(0),
|
||||
Node->getOperand(0),
|
||||
CurDAG->getTargetConstant(Imm, dl, MVT::i8));
|
||||
CurDAG->getConstant(Imm, dl, MVT::i8));
|
||||
ReplaceNode(Node, Res.getNode());
|
||||
SelectCode(Res.getNode());
|
||||
return;
|
||||
|
|
|
@ -211,7 +211,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
|||
// Integer absolute.
|
||||
if (Subtarget.hasCMov()) {
|
||||
setOperationAction(ISD::ABS , MVT::i16 , Custom);
|
||||
setOperationAction(ISD::ABS , MVT::i32 , Custom);
|
||||
setOperationAction(ISD::ABS , MVT::i32 , Custom);
|
||||
}
|
||||
setOperationAction(ISD::ABS , MVT::i64 , Custom);
|
||||
|
||||
|
@ -4981,7 +4981,7 @@ bool X86TargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
|
|||
|
||||
// Find the type this will be legalized too. Otherwise we might prematurely
|
||||
// convert this to shl+add/sub and then still have to type legalize those ops.
|
||||
// Another choice would be to defer the decision for illegal types until
|
||||
// Another choice would be to defer the decision for illegal types until
|
||||
// after type legalization. But constant splat vectors of i64 can't make it
|
||||
// through type legalization on 32-bit targets so we would need to special
|
||||
// case vXi64.
|
||||
|
@ -5759,7 +5759,7 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
|
|||
|
||||
if (IdxVal == 0) {
|
||||
// Zero lower bits of the Vec
|
||||
SDValue ShiftBits = DAG.getTargetConstant(SubVecNumElems, dl, MVT::i8);
|
||||
SDValue ShiftBits = DAG.getConstant(SubVecNumElems, dl, MVT::i8);
|
||||
Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec,
|
||||
ZeroIdx);
|
||||
Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits);
|
||||
|
@ -5778,7 +5778,7 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
|
|||
if (Vec.isUndef()) {
|
||||
assert(IdxVal != 0 && "Unexpected index");
|
||||
SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec,
|
||||
DAG.getTargetConstant(IdxVal, dl, MVT::i8));
|
||||
DAG.getConstant(IdxVal, dl, MVT::i8));
|
||||
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, SubVec, ZeroIdx);
|
||||
}
|
||||
|
||||
|
@ -5788,17 +5788,17 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
|
|||
unsigned ShiftLeft = NumElems - SubVecNumElems;
|
||||
unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal;
|
||||
SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec,
|
||||
DAG.getTargetConstant(ShiftLeft, dl, MVT::i8));
|
||||
DAG.getConstant(ShiftLeft, dl, MVT::i8));
|
||||
if (ShiftRight != 0)
|
||||
SubVec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, SubVec,
|
||||
DAG.getTargetConstant(ShiftRight, dl, MVT::i8));
|
||||
DAG.getConstant(ShiftRight, dl, MVT::i8));
|
||||
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, SubVec, ZeroIdx);
|
||||
}
|
||||
|
||||
// Simple case when we put subvector in the upper part
|
||||
if (IdxVal + SubVecNumElems == NumElems) {
|
||||
SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec,
|
||||
DAG.getTargetConstant(IdxVal, dl, MVT::i8));
|
||||
DAG.getConstant(IdxVal, dl, MVT::i8));
|
||||
if (SubVecNumElems * 2 == NumElems) {
|
||||
// Special case, use legal zero extending insert_subvector. This allows
|
||||
// isel to opimitize when bits are known zero.
|
||||
|
@ -5811,7 +5811,7 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
|
|||
Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
|
||||
Undef, Vec, ZeroIdx);
|
||||
NumElems = WideOpVT.getVectorNumElements();
|
||||
SDValue ShiftBits = DAG.getTargetConstant(NumElems - IdxVal, dl, MVT::i8);
|
||||
SDValue ShiftBits = DAG.getConstant(NumElems - IdxVal, dl, MVT::i8);
|
||||
Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits);
|
||||
Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits);
|
||||
}
|
||||
|
@ -5827,17 +5827,17 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
|
|||
Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, ZeroIdx);
|
||||
// Move the current value of the bit to be replace to the lsbs.
|
||||
Op = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec,
|
||||
DAG.getTargetConstant(IdxVal, dl, MVT::i8));
|
||||
DAG.getConstant(IdxVal, dl, MVT::i8));
|
||||
// Xor with the new bit.
|
||||
Op = DAG.getNode(ISD::XOR, dl, WideOpVT, Op, SubVec);
|
||||
// Shift to MSB, filling bottom bits with 0.
|
||||
unsigned ShiftLeft = NumElems - SubVecNumElems;
|
||||
Op = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Op,
|
||||
DAG.getTargetConstant(ShiftLeft, dl, MVT::i8));
|
||||
DAG.getConstant(ShiftLeft, dl, MVT::i8));
|
||||
// Shift to the final position, filling upper bits with 0.
|
||||
unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal;
|
||||
Op = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Op,
|
||||
DAG.getTargetConstant(ShiftRight, dl, MVT::i8));
|
||||
DAG.getConstant(ShiftRight, dl, MVT::i8));
|
||||
// Xor with original vector leaving the new value.
|
||||
Op = DAG.getNode(ISD::XOR, dl, WideOpVT, Vec, Op);
|
||||
// Reduce to original width if needed.
|
||||
|
@ -7637,7 +7637,7 @@ static SDValue LowerBuildVectorv4x32(SDValue Op, SelectionDAG &DAG,
|
|||
assert((InsertPSMask & ~0xFFu) == 0 && "Invalid mask!");
|
||||
SDLoc DL(Op);
|
||||
SDValue Result = DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, V1, V2,
|
||||
DAG.getIntPtrConstant(InsertPSMask, DL, true));
|
||||
DAG.getIntPtrConstant(InsertPSMask, DL));
|
||||
return DAG.getBitcast(VT, Result);
|
||||
}
|
||||
|
||||
|
@ -7650,7 +7650,7 @@ static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp, unsigned NumBits,
|
|||
unsigned Opc = isLeft ? X86ISD::VSHLDQ : X86ISD::VSRLDQ;
|
||||
SrcOp = DAG.getBitcast(ShVT, SrcOp);
|
||||
assert(NumBits % 8 == 0 && "Only support byte sized shifts");
|
||||
SDValue ShiftVal = DAG.getTargetConstant(NumBits / 8, dl, MVT::i8);
|
||||
SDValue ShiftVal = DAG.getConstant(NumBits/8, dl, MVT::i8);
|
||||
return DAG.getBitcast(VT, DAG.getNode(Opc, dl, ShVT, SrcOp, ShiftVal));
|
||||
}
|
||||
|
||||
|
@ -9439,9 +9439,9 @@ static SDValue createVariablePermute(MVT VT, SDValue SrcVec, SDValue IndicesVec,
|
|||
SDValue HiHi = DAG.getVectorShuffle(MVT::v8f32, DL, SrcVec, SrcVec,
|
||||
{4, 5, 6, 7, 4, 5, 6, 7});
|
||||
if (Subtarget.hasXOP())
|
||||
return DAG.getBitcast(
|
||||
VT, DAG.getNode(X86ISD::VPERMIL2, DL, MVT::v8f32, LoLo, HiHi,
|
||||
IndicesVec, DAG.getTargetConstant(0, DL, MVT::i8)));
|
||||
return DAG.getBitcast(VT, DAG.getNode(X86ISD::VPERMIL2, DL, MVT::v8f32,
|
||||
LoLo, HiHi, IndicesVec,
|
||||
DAG.getConstant(0, DL, MVT::i8)));
|
||||
// Permute Lo and Hi and then select based on index range.
|
||||
// This works as VPERMILPS only uses index bits[0:1] to permute elements.
|
||||
SDValue Res = DAG.getSelectCC(
|
||||
|
@ -9475,9 +9475,9 @@ static SDValue createVariablePermute(MVT VT, SDValue SrcVec, SDValue IndicesVec,
|
|||
// VPERMIL2PD selects with bit#1 of the index vector, so scale IndicesVec.
|
||||
IndicesVec = DAG.getNode(ISD::ADD, DL, IndicesVT, IndicesVec, IndicesVec);
|
||||
if (Subtarget.hasXOP())
|
||||
return DAG.getBitcast(
|
||||
VT, DAG.getNode(X86ISD::VPERMIL2, DL, MVT::v4f64, LoLo, HiHi,
|
||||
IndicesVec, DAG.getTargetConstant(0, DL, MVT::i8)));
|
||||
return DAG.getBitcast(VT, DAG.getNode(X86ISD::VPERMIL2, DL, MVT::v4f64,
|
||||
LoLo, HiHi, IndicesVec,
|
||||
DAG.getConstant(0, DL, MVT::i8)));
|
||||
// Permute Lo and Hi and then select based on index range.
|
||||
// This works as VPERMILPD only uses index bit[1] to permute elements.
|
||||
SDValue Res = DAG.getSelectCC(
|
||||
|
@ -10048,7 +10048,7 @@ static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op,
|
|||
DAG.getUNDEF(ShiftVT), SubVec,
|
||||
DAG.getIntPtrConstant(0, dl));
|
||||
Op = DAG.getNode(X86ISD::KSHIFTL, dl, ShiftVT, SubVec,
|
||||
DAG.getTargetConstant(Idx * SubVecNumElts, dl, MVT::i8));
|
||||
DAG.getConstant(Idx * SubVecNumElts, dl, MVT::i8));
|
||||
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResVT, Op,
|
||||
DAG.getIntPtrConstant(0, dl));
|
||||
}
|
||||
|
@ -10441,7 +10441,7 @@ static unsigned getV4X86ShuffleImm(ArrayRef<int> Mask) {
|
|||
|
||||
static SDValue getV4X86ShuffleImm8ForMask(ArrayRef<int> Mask, const SDLoc &DL,
|
||||
SelectionDAG &DAG) {
|
||||
return DAG.getTargetConstant(getV4X86ShuffleImm(Mask), DL, MVT::i8);
|
||||
return DAG.getConstant(getV4X86ShuffleImm(Mask), DL, MVT::i8);
|
||||
}
|
||||
|
||||
/// Compute whether each element of a shuffle is zeroable.
|
||||
|
@ -11086,7 +11086,7 @@ static SDValue lowerShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
|
|||
case MVT::v8i16:
|
||||
assert(Subtarget.hasSSE41() && "128-bit blends require SSE41!");
|
||||
return DAG.getNode(X86ISD::BLENDI, DL, VT, V1, V2,
|
||||
DAG.getTargetConstant(BlendMask, DL, MVT::i8));
|
||||
DAG.getConstant(BlendMask, DL, MVT::i8));
|
||||
case MVT::v16i16: {
|
||||
assert(Subtarget.hasAVX2() && "v16i16 blends require AVX2!");
|
||||
SmallVector<int, 8> RepeatedMask;
|
||||
|
@ -11098,7 +11098,7 @@ static SDValue lowerShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
|
|||
if (RepeatedMask[i] >= 8)
|
||||
BlendMask |= 1ull << i;
|
||||
return DAG.getNode(X86ISD::BLENDI, DL, MVT::v16i16, V1, V2,
|
||||
DAG.getTargetConstant(BlendMask, DL, MVT::i8));
|
||||
DAG.getConstant(BlendMask, DL, MVT::i8));
|
||||
}
|
||||
// Use PBLENDW for lower/upper lanes and then blend lanes.
|
||||
// TODO - we should allow 2 PBLENDW here and leave shuffle combine to
|
||||
|
@ -11107,9 +11107,9 @@ static SDValue lowerShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
|
|||
uint64_t HiMask = (BlendMask >> 8) & 0xFF;
|
||||
if (LoMask == 0 || LoMask == 255 || HiMask == 0 || HiMask == 255) {
|
||||
SDValue Lo = DAG.getNode(X86ISD::BLENDI, DL, MVT::v16i16, V1, V2,
|
||||
DAG.getTargetConstant(LoMask, DL, MVT::i8));
|
||||
DAG.getConstant(LoMask, DL, MVT::i8));
|
||||
SDValue Hi = DAG.getNode(X86ISD::BLENDI, DL, MVT::v16i16, V1, V2,
|
||||
DAG.getTargetConstant(HiMask, DL, MVT::i8));
|
||||
DAG.getConstant(HiMask, DL, MVT::i8));
|
||||
return DAG.getVectorShuffle(
|
||||
MVT::v16i16, DL, Lo, Hi,
|
||||
{0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31});
|
||||
|
@ -11369,7 +11369,7 @@ static SDValue lowerShuffleAsByteRotateAndPermute(
|
|||
SDValue Rotate = DAG.getBitcast(
|
||||
VT, DAG.getNode(X86ISD::PALIGNR, DL, ByteVT, DAG.getBitcast(ByteVT, Hi),
|
||||
DAG.getBitcast(ByteVT, Lo),
|
||||
DAG.getTargetConstant(Scale * RotAmt, DL, MVT::i8)));
|
||||
DAG.getConstant(Scale * RotAmt, DL, MVT::i8)));
|
||||
SmallVector<int, 64> PermMask(NumElts, SM_SentinelUndef);
|
||||
for (int Lane = 0; Lane != NumElts; Lane += NumEltsPerLane) {
|
||||
for (int Elt = 0; Elt != NumEltsPerLane; ++Elt) {
|
||||
|
@ -11576,7 +11576,7 @@ static SDValue lowerShuffleAsByteRotate(const SDLoc &DL, MVT VT, SDValue V1,
|
|||
"512-bit PALIGNR requires BWI instructions");
|
||||
return DAG.getBitcast(
|
||||
VT, DAG.getNode(X86ISD::PALIGNR, DL, ByteVT, Lo, Hi,
|
||||
DAG.getTargetConstant(ByteRotation, DL, MVT::i8)));
|
||||
DAG.getConstant(ByteRotation, DL, MVT::i8)));
|
||||
}
|
||||
|
||||
assert(VT.is128BitVector() &&
|
||||
|
@ -11590,12 +11590,10 @@ static SDValue lowerShuffleAsByteRotate(const SDLoc &DL, MVT VT, SDValue V1,
|
|||
int LoByteShift = 16 - ByteRotation;
|
||||
int HiByteShift = ByteRotation;
|
||||
|
||||
SDValue LoShift =
|
||||
DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Lo,
|
||||
DAG.getTargetConstant(LoByteShift, DL, MVT::i8));
|
||||
SDValue HiShift =
|
||||
DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v16i8, Hi,
|
||||
DAG.getTargetConstant(HiByteShift, DL, MVT::i8));
|
||||
SDValue LoShift = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Lo,
|
||||
DAG.getConstant(LoByteShift, DL, MVT::i8));
|
||||
SDValue HiShift = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v16i8, Hi,
|
||||
DAG.getConstant(HiByteShift, DL, MVT::i8));
|
||||
return DAG.getBitcast(VT,
|
||||
DAG.getNode(ISD::OR, DL, MVT::v16i8, LoShift, HiShift));
|
||||
}
|
||||
|
@ -11627,7 +11625,7 @@ static SDValue lowerShuffleAsRotate(const SDLoc &DL, MVT VT, SDValue V1,
|
|||
return SDValue();
|
||||
|
||||
return DAG.getNode(X86ISD::VALIGN, DL, VT, Lo, Hi,
|
||||
DAG.getTargetConstant(Rotation, DL, MVT::i8));
|
||||
DAG.getConstant(Rotation, DL, MVT::i8));
|
||||
}
|
||||
|
||||
/// Try to lower a vector shuffle as a byte shift sequence.
|
||||
|
@ -11666,27 +11664,27 @@ static SDValue lowerVectorShuffleAsByteShiftMask(
|
|||
if (ZeroLo == 0) {
|
||||
unsigned Shift = (NumElts - 1) - (Mask[ZeroLo + Len - 1] % NumElts);
|
||||
Res = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Res,
|
||||
DAG.getTargetConstant(Scale * Shift, DL, MVT::i8));
|
||||
DAG.getConstant(Scale * Shift, DL, MVT::i8));
|
||||
Res = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v16i8, Res,
|
||||
DAG.getTargetConstant(Scale * ZeroHi, DL, MVT::i8));
|
||||
DAG.getConstant(Scale * ZeroHi, DL, MVT::i8));
|
||||
} else if (ZeroHi == 0) {
|
||||
unsigned Shift = Mask[ZeroLo] % NumElts;
|
||||
Res = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v16i8, Res,
|
||||
DAG.getTargetConstant(Scale * Shift, DL, MVT::i8));
|
||||
DAG.getConstant(Scale * Shift, DL, MVT::i8));
|
||||
Res = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Res,
|
||||
DAG.getTargetConstant(Scale * ZeroLo, DL, MVT::i8));
|
||||
DAG.getConstant(Scale * ZeroLo, DL, MVT::i8));
|
||||
} else if (!Subtarget.hasSSSE3()) {
|
||||
// If we don't have PSHUFB then its worth avoiding an AND constant mask
|
||||
// by performing 3 byte shifts. Shuffle combining can kick in above that.
|
||||
// TODO: There may be some cases where VSH{LR}DQ+PAND is still better.
|
||||
unsigned Shift = (NumElts - 1) - (Mask[ZeroLo + Len - 1] % NumElts);
|
||||
Res = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Res,
|
||||
DAG.getTargetConstant(Scale * Shift, DL, MVT::i8));
|
||||
DAG.getConstant(Scale * Shift, DL, MVT::i8));
|
||||
Shift += Mask[ZeroLo] % NumElts;
|
||||
Res = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v16i8, Res,
|
||||
DAG.getTargetConstant(Scale * Shift, DL, MVT::i8));
|
||||
DAG.getConstant(Scale * Shift, DL, MVT::i8));
|
||||
Res = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Res,
|
||||
DAG.getTargetConstant(Scale * ZeroLo, DL, MVT::i8));
|
||||
DAG.getConstant(Scale * ZeroLo, DL, MVT::i8));
|
||||
} else
|
||||
return SDValue();
|
||||
|
||||
|
@ -11808,7 +11806,7 @@ static SDValue lowerShuffleAsShift(const SDLoc &DL, MVT VT, SDValue V1,
|
|||
"Illegal integer vector type");
|
||||
V = DAG.getBitcast(ShiftVT, V);
|
||||
V = DAG.getNode(Opcode, DL, ShiftVT, V,
|
||||
DAG.getTargetConstant(ShiftAmt, DL, MVT::i8));
|
||||
DAG.getConstant(ShiftAmt, DL, MVT::i8));
|
||||
return DAG.getBitcast(VT, V);
|
||||
}
|
||||
|
||||
|
@ -11942,14 +11940,14 @@ static SDValue lowerShuffleWithSSE4A(const SDLoc &DL, MVT VT, SDValue V1,
|
|||
uint64_t BitLen, BitIdx;
|
||||
if (matchShuffleAsEXTRQ(VT, V1, V2, Mask, BitLen, BitIdx, Zeroable))
|
||||
return DAG.getNode(X86ISD::EXTRQI, DL, VT, V1,
|
||||
DAG.getTargetConstant(BitLen, DL, MVT::i8),
|
||||
DAG.getTargetConstant(BitIdx, DL, MVT::i8));
|
||||
DAG.getConstant(BitLen, DL, MVT::i8),
|
||||
DAG.getConstant(BitIdx, DL, MVT::i8));
|
||||
|
||||
if (matchShuffleAsINSERTQ(VT, V1, V2, Mask, BitLen, BitIdx))
|
||||
return DAG.getNode(X86ISD::INSERTQI, DL, VT, V1 ? V1 : DAG.getUNDEF(VT),
|
||||
V2 ? V2 : DAG.getUNDEF(VT),
|
||||
DAG.getTargetConstant(BitLen, DL, MVT::i8),
|
||||
DAG.getTargetConstant(BitIdx, DL, MVT::i8));
|
||||
DAG.getConstant(BitLen, DL, MVT::i8),
|
||||
DAG.getConstant(BitIdx, DL, MVT::i8));
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
@ -12046,8 +12044,8 @@ static SDValue lowerShuffleAsSpecificZeroOrAnyExtend(
|
|||
int LoIdx = Offset * EltBits;
|
||||
SDValue Lo = DAG.getBitcast(
|
||||
MVT::v2i64, DAG.getNode(X86ISD::EXTRQI, DL, VT, InputV,
|
||||
DAG.getTargetConstant(EltBits, DL, MVT::i8),
|
||||
DAG.getTargetConstant(LoIdx, DL, MVT::i8)));
|
||||
DAG.getConstant(EltBits, DL, MVT::i8),
|
||||
DAG.getConstant(LoIdx, DL, MVT::i8)));
|
||||
|
||||
if (isUndefUpperHalf(Mask) || !SafeOffset(Offset + 1))
|
||||
return DAG.getBitcast(VT, Lo);
|
||||
|
@ -12055,8 +12053,8 @@ static SDValue lowerShuffleAsSpecificZeroOrAnyExtend(
|
|||
int HiIdx = (Offset + 1) * EltBits;
|
||||
SDValue Hi = DAG.getBitcast(
|
||||
MVT::v2i64, DAG.getNode(X86ISD::EXTRQI, DL, VT, InputV,
|
||||
DAG.getTargetConstant(EltBits, DL, MVT::i8),
|
||||
DAG.getTargetConstant(HiIdx, DL, MVT::i8)));
|
||||
DAG.getConstant(EltBits, DL, MVT::i8),
|
||||
DAG.getConstant(HiIdx, DL, MVT::i8)));
|
||||
return DAG.getBitcast(VT,
|
||||
DAG.getNode(X86ISD::UNPCKL, DL, MVT::v2i64, Lo, Hi));
|
||||
}
|
||||
|
@ -12366,9 +12364,9 @@ static SDValue lowerShuffleAsElementInsertion(
|
|||
V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), V2Shuffle);
|
||||
} else {
|
||||
V2 = DAG.getBitcast(MVT::v16i8, V2);
|
||||
V2 = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, V2,
|
||||
DAG.getTargetConstant(
|
||||
V2Index * EltVT.getSizeInBits() / 8, DL, MVT::i8));
|
||||
V2 = DAG.getNode(
|
||||
X86ISD::VSHLDQ, DL, MVT::v16i8, V2,
|
||||
DAG.getConstant(V2Index * EltVT.getSizeInBits() / 8, DL, MVT::i8));
|
||||
V2 = DAG.getBitcast(VT, V2);
|
||||
}
|
||||
}
|
||||
|
@ -12800,7 +12798,7 @@ static SDValue lowerShuffleAsInsertPS(const SDLoc &DL, SDValue V1, SDValue V2,
|
|||
|
||||
// Insert the V2 element into the desired position.
|
||||
return DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, V1, V2,
|
||||
DAG.getTargetConstant(InsertPSMask, DL, MVT::i8));
|
||||
DAG.getConstant(InsertPSMask, DL, MVT::i8));
|
||||
}
|
||||
|
||||
/// Try to lower a shuffle as a permute of the inputs followed by an
|
||||
|
@ -12949,14 +12947,14 @@ static SDValue lowerV2F64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
|||
// If we have AVX, we can use VPERMILPS which will allow folding a load
|
||||
// into the shuffle.
|
||||
return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v2f64, V1,
|
||||
DAG.getTargetConstant(SHUFPDMask, DL, MVT::i8));
|
||||
DAG.getConstant(SHUFPDMask, DL, MVT::i8));
|
||||
}
|
||||
|
||||
return DAG.getNode(
|
||||
X86ISD::SHUFP, DL, MVT::v2f64,
|
||||
Mask[0] == SM_SentinelUndef ? DAG.getUNDEF(MVT::v2f64) : V1,
|
||||
Mask[1] == SM_SentinelUndef ? DAG.getUNDEF(MVT::v2f64) : V1,
|
||||
DAG.getTargetConstant(SHUFPDMask, DL, MVT::i8));
|
||||
DAG.getConstant(SHUFPDMask, DL, MVT::i8));
|
||||
}
|
||||
assert(Mask[0] >= 0 && "No undef lanes in multi-input v2 shuffles!");
|
||||
assert(Mask[1] >= 0 && "No undef lanes in multi-input v2 shuffles!");
|
||||
|
@ -13002,7 +13000,7 @@ static SDValue lowerV2F64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
|||
|
||||
unsigned SHUFPDMask = (Mask[0] == 1) | (((Mask[1] - 2) == 1) << 1);
|
||||
return DAG.getNode(X86ISD::SHUFP, DL, MVT::v2f64, V1, V2,
|
||||
DAG.getTargetConstant(SHUFPDMask, DL, MVT::i8));
|
||||
DAG.getConstant(SHUFPDMask, DL, MVT::i8));
|
||||
}
|
||||
|
||||
/// Handle lowering of 2-lane 64-bit integer shuffles.
|
||||
|
@ -14882,8 +14880,8 @@ static SDValue lowerV2X128Shuffle(const SDLoc &DL, MVT VT, SDValue V1,
|
|||
if (WidenedMask[0] < 2 && WidenedMask[1] >= 2) {
|
||||
unsigned PermMask = ((WidenedMask[0] % 2) << 0) |
|
||||
((WidenedMask[1] % 2) << 1);
|
||||
return DAG.getNode(X86ISD::SHUF128, DL, VT, V1, V2,
|
||||
DAG.getTargetConstant(PermMask, DL, MVT::i8));
|
||||
return DAG.getNode(X86ISD::SHUF128, DL, VT, V1, V2,
|
||||
DAG.getConstant(PermMask, DL, MVT::i8));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -14915,7 +14913,7 @@ static SDValue lowerV2X128Shuffle(const SDLoc &DL, MVT VT, SDValue V1,
|
|||
V2 = DAG.getUNDEF(VT);
|
||||
|
||||
return DAG.getNode(X86ISD::VPERM2X128, DL, VT, V1, V2,
|
||||
DAG.getTargetConstant(PermMask, DL, MVT::i8));
|
||||
DAG.getConstant(PermMask, DL, MVT::i8));
|
||||
}
|
||||
|
||||
/// Lower a vector shuffle by first fixing the 128-bit lanes and then
|
||||
|
@ -15544,7 +15542,7 @@ static SDValue lowerShuffleWithSHUFPD(const SDLoc &DL, MVT VT, SDValue V1,
|
|||
V2 = getZeroVector(VT, Subtarget, DAG, DL);
|
||||
|
||||
return DAG.getNode(X86ISD::SHUFP, DL, VT, V1, V2,
|
||||
DAG.getTargetConstant(Immediate, DL, MVT::i8));
|
||||
DAG.getConstant(Immediate, DL, MVT::i8));
|
||||
}
|
||||
|
||||
/// Handle lowering of 4-lane 64-bit floating point shuffles.
|
||||
|
@ -15579,7 +15577,7 @@ static SDValue lowerV4F64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
|||
unsigned VPERMILPMask = (Mask[0] == 1) | ((Mask[1] == 1) << 1) |
|
||||
((Mask[2] == 3) << 2) | ((Mask[3] == 3) << 3);
|
||||
return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v4f64, V1,
|
||||
DAG.getTargetConstant(VPERMILPMask, DL, MVT::i8));
|
||||
DAG.getConstant(VPERMILPMask, DL, MVT::i8));
|
||||
}
|
||||
|
||||
// With AVX2 we have direct support for this permutation.
|
||||
|
@ -16318,7 +16316,7 @@ static SDValue lowerV4X128Shuffle(const SDLoc &DL, MVT VT, ArrayRef<int> Mask,
|
|||
}
|
||||
|
||||
return DAG.getNode(X86ISD::SHUF128, DL, VT, Ops[0], Ops[1],
|
||||
DAG.getTargetConstant(PermMask, DL, MVT::i8));
|
||||
DAG.getConstant(PermMask, DL, MVT::i8));
|
||||
}
|
||||
|
||||
/// Handle lowering of 8-lane 64-bit floating point shuffles.
|
||||
|
@ -16343,7 +16341,7 @@ static SDValue lowerV8F64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
|||
((Mask[4] == 5) << 4) | ((Mask[5] == 5) << 5) |
|
||||
((Mask[6] == 7) << 6) | ((Mask[7] == 7) << 7);
|
||||
return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v8f64, V1,
|
||||
DAG.getTargetConstant(VPERMILPMask, DL, MVT::i8));
|
||||
DAG.getConstant(VPERMILPMask, DL, MVT::i8));
|
||||
}
|
||||
|
||||
SmallVector<int, 4> RepeatedMask;
|
||||
|
@ -16772,7 +16770,7 @@ static SDValue lower1BitShuffleAsKSHIFTR(const SDLoc &DL, ArrayRef<int> Mask,
|
|||
DAG.getUNDEF(WideVT), V1,
|
||||
DAG.getIntPtrConstant(0, DL));
|
||||
Res = DAG.getNode(X86ISD::KSHIFTR, DL, WideVT, Res,
|
||||
DAG.getTargetConstant(ShiftAmt, DL, MVT::i8));
|
||||
DAG.getConstant(ShiftAmt, DL, MVT::i8));
|
||||
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
|
||||
DAG.getIntPtrConstant(0, DL));
|
||||
}
|
||||
|
@ -16879,13 +16877,13 @@ static SDValue lower1BitShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
|||
int WideElts = WideVT.getVectorNumElements();
|
||||
// Shift left to put the original vector in the MSBs of the new size.
|
||||
Res = DAG.getNode(X86ISD::KSHIFTL, DL, WideVT, Res,
|
||||
DAG.getTargetConstant(WideElts - NumElts, DL, MVT::i8));
|
||||
DAG.getConstant(WideElts - NumElts, DL, MVT::i8));
|
||||
// Increase the shift amount to account for the left shift.
|
||||
ShiftAmt += WideElts - NumElts;
|
||||
}
|
||||
|
||||
Res = DAG.getNode(Opcode, DL, WideVT, Res,
|
||||
DAG.getTargetConstant(ShiftAmt, DL, MVT::i8));
|
||||
DAG.getConstant(ShiftAmt, DL, MVT::i8));
|
||||
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
|
||||
DAG.getIntPtrConstant(0, DL));
|
||||
}
|
||||
|
@ -17333,7 +17331,7 @@ static SDValue ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG,
|
|||
|
||||
// Use kshiftr instruction to move to the lower element.
|
||||
Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideVecVT, Vec,
|
||||
DAG.getTargetConstant(IdxVal, dl, MVT::i8));
|
||||
DAG.getConstant(IdxVal, dl, MVT::i8));
|
||||
|
||||
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), Vec,
|
||||
DAG.getIntPtrConstant(0, dl));
|
||||
|
@ -17561,7 +17559,7 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
|
|||
(Subtarget.hasAVX2() && EltVT == MVT::i32)) {
|
||||
SDValue N1Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, N1);
|
||||
return DAG.getNode(X86ISD::BLENDI, dl, VT, N0, N1Vec,
|
||||
DAG.getTargetConstant(1, dl, MVT::i8));
|
||||
DAG.getConstant(1, dl, MVT::i8));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -17637,7 +17635,7 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
|
|||
// Create this as a scalar to vector..
|
||||
N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, N1);
|
||||
return DAG.getNode(X86ISD::INSERTPS, dl, VT, N0, N1,
|
||||
DAG.getTargetConstant(IdxVal << 4, dl, MVT::i8));
|
||||
DAG.getConstant(IdxVal << 4, dl, MVT::i8));
|
||||
}
|
||||
|
||||
// PINSR* works with constant index.
|
||||
|
@ -17723,7 +17721,7 @@ static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget,
|
|||
|
||||
// Shift to the LSB.
|
||||
Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideVecVT, Vec,
|
||||
DAG.getTargetConstant(IdxVal, dl, MVT::i8));
|
||||
DAG.getConstant(IdxVal, dl, MVT::i8));
|
||||
|
||||
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, Op.getValueType(), Vec,
|
||||
DAG.getIntPtrConstant(0, dl));
|
||||
|
@ -18266,8 +18264,8 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget,
|
|||
APInt APIntShiftAmt;
|
||||
if (X86::isConstantSplat(Amt, APIntShiftAmt)) {
|
||||
uint64_t ShiftAmt = APIntShiftAmt.urem(VT.getScalarSizeInBits());
|
||||
return DAG.getNode(IsFSHR ? X86ISD::VSHRD : X86ISD::VSHLD, DL, VT, Op0,
|
||||
Op1, DAG.getTargetConstant(ShiftAmt, DL, MVT::i8));
|
||||
return DAG.getNode(IsFSHR ? X86ISD::VSHRD : X86ISD::VSHLD, DL, VT,
|
||||
Op0, Op1, DAG.getConstant(ShiftAmt, DL, MVT::i8));
|
||||
}
|
||||
|
||||
return DAG.getNode(IsFSHR ? X86ISD::VSHRDV : X86ISD::VSHLDV, DL, VT,
|
||||
|
@ -18699,7 +18697,7 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG,
|
|||
// Low will be bitcasted right away, so do not bother bitcasting back to its
|
||||
// original type.
|
||||
Low = DAG.getNode(X86ISD::BLENDI, DL, VecI16VT, VecBitcast,
|
||||
VecCstLowBitcast, DAG.getTargetConstant(0xaa, DL, MVT::i8));
|
||||
VecCstLowBitcast, DAG.getConstant(0xaa, DL, MVT::i8));
|
||||
// uint4 hi = _mm_blend_epi16( _mm_srli_epi32(v,16),
|
||||
// (uint4) 0x53000000, 0xaa);
|
||||
SDValue VecCstHighBitcast = DAG.getBitcast(VecI16VT, VecCstHigh);
|
||||
|
@ -18707,7 +18705,7 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG,
|
|||
// High will be bitcasted right away, so do not bother bitcasting back to
|
||||
// its original type.
|
||||
High = DAG.getNode(X86ISD::BLENDI, DL, VecI16VT, VecShiftBitcast,
|
||||
VecCstHighBitcast, DAG.getTargetConstant(0xaa, DL, MVT::i8));
|
||||
VecCstHighBitcast, DAG.getConstant(0xaa, DL, MVT::i8));
|
||||
} else {
|
||||
SDValue VecCstMask = DAG.getConstant(0xffff, DL, VecIntVT);
|
||||
// uint4 lo = (v & (uint4) 0xffff) | (uint4) 0x4b000000;
|
||||
|
@ -20657,14 +20655,14 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
|
|||
}
|
||||
|
||||
SDValue Cmp0 = DAG.getNode(Opc, dl, VT, Op0, Op1,
|
||||
DAG.getTargetConstant(CC0, dl, MVT::i8));
|
||||
DAG.getConstant(CC0, dl, MVT::i8));
|
||||
SDValue Cmp1 = DAG.getNode(Opc, dl, VT, Op0, Op1,
|
||||
DAG.getTargetConstant(CC1, dl, MVT::i8));
|
||||
DAG.getConstant(CC1, dl, MVT::i8));
|
||||
Cmp = DAG.getNode(CombineOpc, dl, VT, Cmp0, Cmp1);
|
||||
} else {
|
||||
// Handle all other FP comparisons here.
|
||||
Cmp = DAG.getNode(Opc, dl, VT, Op0, Op1,
|
||||
DAG.getTargetConstant(SSECC, dl, MVT::i8));
|
||||
DAG.getConstant(SSECC, dl, MVT::i8));
|
||||
}
|
||||
|
||||
// If this is SSE/AVX CMPP, bitcast the result back to integer to match the
|
||||
|
@ -20727,7 +20725,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
|
|||
ISD::isUnsignedIntSetCC(Cond) ? X86ISD::VPCOMU : X86ISD::VPCOM;
|
||||
|
||||
return DAG.getNode(Opc, dl, VT, Op0, Op1,
|
||||
DAG.getTargetConstant(CmpMode, dl, MVT::i8));
|
||||
DAG.getConstant(CmpMode, dl, MVT::i8));
|
||||
}
|
||||
|
||||
// (X & Y) != 0 --> (X & Y) == Y iff Y is power-of-2.
|
||||
|
@ -21197,16 +21195,15 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
|
|||
cast<CondCodeSDNode>(Cond.getOperand(2))->get(), CondOp0, CondOp1);
|
||||
|
||||
if (Subtarget.hasAVX512()) {
|
||||
SDValue Cmp =
|
||||
DAG.getNode(X86ISD::FSETCCM, DL, MVT::v1i1, CondOp0, CondOp1,
|
||||
DAG.getTargetConstant(SSECC, DL, MVT::i8));
|
||||
SDValue Cmp = DAG.getNode(X86ISD::FSETCCM, DL, MVT::v1i1, CondOp0,
|
||||
CondOp1, DAG.getConstant(SSECC, DL, MVT::i8));
|
||||
assert(!VT.isVector() && "Not a scalar type?");
|
||||
return DAG.getNode(X86ISD::SELECTS, DL, VT, Cmp, Op1, Op2);
|
||||
}
|
||||
|
||||
if (SSECC < 8 || Subtarget.hasAVX()) {
|
||||
SDValue Cmp = DAG.getNode(X86ISD::FSETCC, DL, VT, CondOp0, CondOp1,
|
||||
DAG.getTargetConstant(SSECC, DL, MVT::i8));
|
||||
DAG.getConstant(SSECC, DL, MVT::i8));
|
||||
|
||||
// If we have AVX, we can use a variable vector select (VBLENDV) instead
|
||||
// of 3 logic instructions for size savings and potentially speed.
|
||||
|
@ -21664,7 +21661,7 @@ static SDValue LowerEXTEND_VECTOR_INREG(SDValue Op,
|
|||
|
||||
unsigned SignExtShift = DestWidth - InSVT.getSizeInBits();
|
||||
SignExt = DAG.getNode(X86ISD::VSRAI, dl, DestVT, Curr,
|
||||
DAG.getTargetConstant(SignExtShift, dl, MVT::i8));
|
||||
DAG.getConstant(SignExtShift, dl, MVT::i8));
|
||||
}
|
||||
|
||||
if (VT == MVT::v2i64) {
|
||||
|
@ -22659,7 +22656,7 @@ static SDValue getTargetVShiftByConstNode(unsigned Opc, const SDLoc &dl, MVT VT,
|
|||
}
|
||||
|
||||
return DAG.getNode(Opc, dl, VT, SrcOp,
|
||||
DAG.getTargetConstant(ShiftAmt, dl, MVT::i8));
|
||||
DAG.getConstant(ShiftAmt, dl, MVT::i8));
|
||||
}
|
||||
|
||||
/// Handle vector element shifts where the shift amount may or may not be a
|
||||
|
@ -22704,7 +22701,7 @@ static SDValue getTargetVShiftNode(unsigned Opc, const SDLoc &dl, MVT VT,
|
|||
ShAmt = DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(ShAmt),
|
||||
MVT::v2i64, ShAmt);
|
||||
else {
|
||||
SDValue ByteShift = DAG.getTargetConstant(
|
||||
SDValue ByteShift = DAG.getConstant(
|
||||
(128 - AmtTy.getScalarSizeInBits()) / 8, SDLoc(ShAmt), MVT::i8);
|
||||
ShAmt = DAG.getBitcast(MVT::v16i8, ShAmt);
|
||||
ShAmt = DAG.getNode(X86ISD::VSHLDQ, SDLoc(ShAmt), MVT::v16i8, ShAmt,
|
||||
|
@ -23002,6 +22999,9 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
|||
SDValue Src2 = Op.getOperand(2);
|
||||
SDValue Src3 = Op.getOperand(3);
|
||||
|
||||
if (IntrData->Type == INTR_TYPE_3OP_IMM8)
|
||||
Src3 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Src3);
|
||||
|
||||
// We specify 2 possible opcodes for intrinsics with rounding modes.
|
||||
// First, we check if the intrinsic may have non-default rounding mode,
|
||||
// (IntrData->Opc1 != 0), then we check the rounding mode operand.
|
||||
|
@ -23254,6 +23254,7 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
|||
case CMP_MASK_CC: {
|
||||
MVT MaskVT = Op.getSimpleValueType();
|
||||
SDValue CC = Op.getOperand(3);
|
||||
CC = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, CC);
|
||||
// We specify 2 possible opcodes for intrinsics with rounding modes.
|
||||
// First, we check if the intrinsic may have non-default rounding mode,
|
||||
// (IntrData->Opc1 != 0), then we check the rounding mode operand.
|
||||
|
@ -23272,7 +23273,7 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
|||
case CMP_MASK_SCALAR_CC: {
|
||||
SDValue Src1 = Op.getOperand(1);
|
||||
SDValue Src2 = Op.getOperand(2);
|
||||
SDValue CC = Op.getOperand(3);
|
||||
SDValue CC = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op.getOperand(3));
|
||||
SDValue Mask = Op.getOperand(4);
|
||||
|
||||
SDValue Cmp;
|
||||
|
@ -23343,10 +23344,10 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
|||
SDValue FCmp;
|
||||
if (isRoundModeCurDirection(Sae))
|
||||
FCmp = DAG.getNode(X86ISD::FSETCCM, dl, MVT::v1i1, LHS, RHS,
|
||||
DAG.getTargetConstant(CondVal, dl, MVT::i8));
|
||||
DAG.getConstant(CondVal, dl, MVT::i8));
|
||||
else if (isRoundModeSAE(Sae))
|
||||
FCmp = DAG.getNode(X86ISD::FSETCCM_SAE, dl, MVT::v1i1, LHS, RHS,
|
||||
DAG.getTargetConstant(CondVal, dl, MVT::i8), Sae);
|
||||
DAG.getConstant(CondVal, dl, MVT::i8), Sae);
|
||||
else
|
||||
return SDValue();
|
||||
// Need to fill with zeros to ensure the bitcast will produce zeroes
|
||||
|
@ -23406,9 +23407,9 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
|||
assert(IntrData->Opc0 == X86ISD::VRNDSCALE && "Unexpected opcode");
|
||||
// Clear the upper bits of the rounding immediate so that the legacy
|
||||
// intrinsic can't trigger the scaling behavior of VRNDSCALE.
|
||||
auto Round = cast<ConstantSDNode>(Op.getOperand(2));
|
||||
SDValue RoundingMode =
|
||||
DAG.getTargetConstant(Round->getZExtValue() & 0xf, dl, MVT::i32);
|
||||
SDValue RoundingMode = DAG.getNode(ISD::AND, dl, MVT::i32,
|
||||
Op.getOperand(2),
|
||||
DAG.getConstant(0xf, dl, MVT::i32));
|
||||
return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(),
|
||||
Op.getOperand(1), RoundingMode);
|
||||
}
|
||||
|
@ -23416,9 +23417,9 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
|||
assert(IntrData->Opc0 == X86ISD::VRNDSCALES && "Unexpected opcode");
|
||||
// Clear the upper bits of the rounding immediate so that the legacy
|
||||
// intrinsic can't trigger the scaling behavior of VRNDSCALE.
|
||||
auto Round = cast<ConstantSDNode>(Op.getOperand(3));
|
||||
SDValue RoundingMode =
|
||||
DAG.getTargetConstant(Round->getZExtValue() & 0xf, dl, MVT::i32);
|
||||
SDValue RoundingMode = DAG.getNode(ISD::AND, dl, MVT::i32,
|
||||
Op.getOperand(3),
|
||||
DAG.getConstant(0xf, dl, MVT::i32));
|
||||
return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(),
|
||||
Op.getOperand(1), Op.getOperand(2), RoundingMode);
|
||||
}
|
||||
|
@ -26095,7 +26096,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
|
|||
(VT == MVT::v32i8 && Subtarget.hasInt256())) &&
|
||||
!Subtarget.hasXOP()) {
|
||||
int NumElts = VT.getVectorNumElements();
|
||||
SDValue Cst8 = DAG.getTargetConstant(8, dl, MVT::i8);
|
||||
SDValue Cst8 = DAG.getConstant(8, dl, MVT::i8);
|
||||
|
||||
// Extend constant shift amount to vXi16 (it doesn't matter if the type
|
||||
// isn't legal).
|
||||
|
@ -26367,7 +26368,7 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
|
|||
unsigned Op = (Opcode == ISD::ROTL ? X86ISD::VROTLI : X86ISD::VROTRI);
|
||||
uint64_t RotateAmt = EltBits[CstSplatIndex].urem(EltSizeInBits);
|
||||
return DAG.getNode(Op, DL, VT, R,
|
||||
DAG.getTargetConstant(RotateAmt, DL, MVT::i8));
|
||||
DAG.getConstant(RotateAmt, DL, MVT::i8));
|
||||
}
|
||||
|
||||
// Else, fall-back on VPROLV/VPRORV.
|
||||
|
@ -26388,7 +26389,7 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
|
|||
if (0 <= CstSplatIndex) {
|
||||
uint64_t RotateAmt = EltBits[CstSplatIndex].urem(EltSizeInBits);
|
||||
return DAG.getNode(X86ISD::VROTLI, DL, VT, R,
|
||||
DAG.getTargetConstant(RotateAmt, DL, MVT::i8));
|
||||
DAG.getConstant(RotateAmt, DL, MVT::i8));
|
||||
}
|
||||
|
||||
// Use general rotate by variable (per-element).
|
||||
|
@ -26625,7 +26626,7 @@ X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
|
|||
|
||||
// If this is a canonical idempotent atomicrmw w/no uses, we have a better
|
||||
// lowering available in lowerAtomicArith.
|
||||
// TODO: push more cases through this path.
|
||||
// TODO: push more cases through this path.
|
||||
if (auto *C = dyn_cast<ConstantInt>(AI->getValOperand()))
|
||||
if (AI->getOperation() == AtomicRMWInst::Or && C->isZero() &&
|
||||
AI->use_empty())
|
||||
|
@ -26695,7 +26696,7 @@ bool X86TargetLowering::lowerAtomicLoadAsLoadSDNode(const LoadInst &LI) const {
|
|||
/// Emit a locked operation on a stack location which does not change any
|
||||
/// memory location, but does involve a lock prefix. Location is chosen to be
|
||||
/// a) very likely accessed only by a single thread to minimize cache traffic,
|
||||
/// and b) definitely dereferenceable. Returns the new Chain result.
|
||||
/// and b) definitely dereferenceable. Returns the new Chain result.
|
||||
static SDValue emitLockedStackOp(SelectionDAG &DAG,
|
||||
const X86Subtarget &Subtarget,
|
||||
SDValue Chain, SDLoc DL) {
|
||||
|
@ -26704,22 +26705,22 @@ static SDValue emitLockedStackOp(SelectionDAG &DAG,
|
|||
// operations issued by the current processor. As such, the location
|
||||
// referenced is not relevant for the ordering properties of the instruction.
|
||||
// See: Intel® 64 and IA-32 ArchitecturesSoftware Developer’s Manual,
|
||||
// 8.2.3.9 Loads and Stores Are Not Reordered with Locked Instructions
|
||||
// 8.2.3.9 Loads and Stores Are Not Reordered with Locked Instructions
|
||||
// 2) Using an immediate operand appears to be the best encoding choice
|
||||
// here since it doesn't require an extra register.
|
||||
// 3) OR appears to be very slightly faster than ADD. (Though, the difference
|
||||
// is small enough it might just be measurement noise.)
|
||||
// 4) When choosing offsets, there are several contributing factors:
|
||||
// a) If there's no redzone, we default to TOS. (We could allocate a cache
|
||||
// line aligned stack object to improve this case.)
|
||||
// line aligned stack object to improve this case.)
|
||||
// b) To minimize our chances of introducing a false dependence, we prefer
|
||||
// to offset the stack usage from TOS slightly.
|
||||
// to offset the stack usage from TOS slightly.
|
||||
// c) To minimize concerns about cross thread stack usage - in particular,
|
||||
// the idiomatic MyThreadPool.run([&StackVars]() {...}) pattern which
|
||||
// captures state in the TOS frame and accesses it from many threads -
|
||||
// we want to use an offset such that the offset is in a distinct cache
|
||||
// line from the TOS frame.
|
||||
//
|
||||
//
|
||||
// For a general discussion of the tradeoffs and benchmark results, see:
|
||||
// https://shipilev.net/blog/2014/on-the-fence-with-dependencies/
|
||||
|
||||
|
@ -26772,7 +26773,7 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget &Subtarget,
|
|||
if (Subtarget.hasMFence())
|
||||
return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0));
|
||||
|
||||
SDValue Chain = Op.getOperand(0);
|
||||
SDValue Chain = Op.getOperand(0);
|
||||
return emitLockedStackOp(DAG, Subtarget, Chain, dl);
|
||||
}
|
||||
|
||||
|
@ -27255,12 +27256,12 @@ static SDValue lowerAtomicArith(SDValue N, SelectionDAG &DAG,
|
|||
// seq_cst which isn't SingleThread, everything just needs to be preserved
|
||||
// during codegen and then dropped. Note that we expect (but don't assume),
|
||||
// that orderings other than seq_cst and acq_rel have been canonicalized to
|
||||
// a store or load.
|
||||
// a store or load.
|
||||
if (AN->getOrdering() == AtomicOrdering::SequentiallyConsistent &&
|
||||
AN->getSyncScopeID() == SyncScope::System) {
|
||||
// Prefer a locked operation against a stack location to minimize cache
|
||||
// traffic. This assumes that stack locations are very likely to be
|
||||
// accessed only by the owning thread.
|
||||
// accessed only by the owning thread.
|
||||
SDValue NewChain = emitLockedStackOp(DAG, Subtarget, Chain, DL);
|
||||
assert(!N->hasAnyUseOfValue(0));
|
||||
// NOTE: The getUNDEF is needed to give something for the unused result 0.
|
||||
|
@ -32635,7 +32636,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
|
|||
Res = DAG.getBitcast(ShuffleVT, V1);
|
||||
Res = DAG.getNode(X86ISD::VPERM2X128, DL, ShuffleVT, Res,
|
||||
DAG.getUNDEF(ShuffleVT),
|
||||
DAG.getTargetConstant(PermMask, DL, MVT::i8));
|
||||
DAG.getConstant(PermMask, DL, MVT::i8));
|
||||
return DAG.getBitcast(RootVT, Res);
|
||||
}
|
||||
|
||||
|
@ -32742,7 +32743,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
|
|||
return SDValue(); // Nothing to do!
|
||||
Res = DAG.getBitcast(ShuffleVT, V1);
|
||||
Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res,
|
||||
DAG.getTargetConstant(PermuteImm, DL, MVT::i8));
|
||||
DAG.getConstant(PermuteImm, DL, MVT::i8));
|
||||
return DAG.getBitcast(RootVT, Res);
|
||||
}
|
||||
}
|
||||
|
@ -32772,7 +32773,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
|
|||
NewV1 = DAG.getBitcast(ShuffleVT, NewV1);
|
||||
NewV2 = DAG.getBitcast(ShuffleVT, NewV2);
|
||||
Res = DAG.getNode(Shuffle, DL, ShuffleVT, NewV1, NewV2,
|
||||
DAG.getTargetConstant(PermuteImm, DL, MVT::i8));
|
||||
DAG.getConstant(PermuteImm, DL, MVT::i8));
|
||||
return DAG.getBitcast(RootVT, Res);
|
||||
}
|
||||
|
||||
|
@ -32789,8 +32790,8 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
|
|||
return SDValue(); // Nothing to do!
|
||||
V1 = DAG.getBitcast(IntMaskVT, V1);
|
||||
Res = DAG.getNode(X86ISD::EXTRQI, DL, IntMaskVT, V1,
|
||||
DAG.getTargetConstant(BitLen, DL, MVT::i8),
|
||||
DAG.getTargetConstant(BitIdx, DL, MVT::i8));
|
||||
DAG.getConstant(BitLen, DL, MVT::i8),
|
||||
DAG.getConstant(BitIdx, DL, MVT::i8));
|
||||
return DAG.getBitcast(RootVT, Res);
|
||||
}
|
||||
|
||||
|
@ -32800,8 +32801,8 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
|
|||
V1 = DAG.getBitcast(IntMaskVT, V1);
|
||||
V2 = DAG.getBitcast(IntMaskVT, V2);
|
||||
Res = DAG.getNode(X86ISD::INSERTQI, DL, IntMaskVT, V1, V2,
|
||||
DAG.getTargetConstant(BitLen, DL, MVT::i8),
|
||||
DAG.getTargetConstant(BitIdx, DL, MVT::i8));
|
||||
DAG.getConstant(BitLen, DL, MVT::i8),
|
||||
DAG.getConstant(BitIdx, DL, MVT::i8));
|
||||
return DAG.getBitcast(RootVT, Res);
|
||||
}
|
||||
}
|
||||
|
@ -32965,7 +32966,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
|
|||
V2 = DAG.getBitcast(MaskVT, V2);
|
||||
SDValue VPerm2MaskOp = getConstVector(VPerm2Idx, IntMaskVT, DAG, DL, true);
|
||||
Res = DAG.getNode(X86ISD::VPERMIL2, DL, MaskVT, V1, V2, VPerm2MaskOp,
|
||||
DAG.getTargetConstant(M2ZImm, DL, MVT::i8));
|
||||
DAG.getConstant(M2ZImm, DL, MVT::i8));
|
||||
return DAG.getBitcast(RootVT, Res);
|
||||
}
|
||||
|
||||
|
@ -33784,7 +33785,7 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
|
|||
return DAG.getBitcast(
|
||||
VT, DAG.getNode(X86ISD::BLENDI, DL, SrcVT, N0.getOperand(0),
|
||||
N1.getOperand(0),
|
||||
DAG.getTargetConstant(BlendMask, DL, MVT::i8)));
|
||||
DAG.getConstant(BlendMask, DL, MVT::i8)));
|
||||
}
|
||||
}
|
||||
return SDValue();
|
||||
|
@ -33852,12 +33853,12 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
|
|||
// If we zero out all elements from Op0 then we don't need to reference it.
|
||||
if (((ZeroMask | (1u << DstIdx)) == 0xF) && !Op0.isUndef())
|
||||
return DAG.getNode(X86ISD::INSERTPS, DL, VT, DAG.getUNDEF(VT), Op1,
|
||||
DAG.getTargetConstant(InsertPSMask, DL, MVT::i8));
|
||||
DAG.getConstant(InsertPSMask, DL, MVT::i8));
|
||||
|
||||
// If we zero out the element from Op1 then we don't need to reference it.
|
||||
if ((ZeroMask & (1u << DstIdx)) && !Op1.isUndef())
|
||||
return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, DAG.getUNDEF(VT),
|
||||
DAG.getTargetConstant(InsertPSMask, DL, MVT::i8));
|
||||
DAG.getConstant(InsertPSMask, DL, MVT::i8));
|
||||
|
||||
// Attempt to merge insertps Op1 with an inner target shuffle node.
|
||||
SmallVector<int, 8> TargetMask1;
|
||||
|
@ -33868,14 +33869,14 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
|
|||
// Zero/UNDEF insertion - zero out element and remove dependency.
|
||||
InsertPSMask |= (1u << DstIdx);
|
||||
return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, DAG.getUNDEF(VT),
|
||||
DAG.getTargetConstant(InsertPSMask, DL, MVT::i8));
|
||||
DAG.getConstant(InsertPSMask, DL, MVT::i8));
|
||||
}
|
||||
// Update insertps mask srcidx and reference the source input directly.
|
||||
assert(0 <= M && M < 8 && "Shuffle index out of range");
|
||||
InsertPSMask = (InsertPSMask & 0x3f) | ((M & 0x3) << 6);
|
||||
Op1 = Ops1[M < 4 ? 0 : 1];
|
||||
return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, Op1,
|
||||
DAG.getTargetConstant(InsertPSMask, DL, MVT::i8));
|
||||
DAG.getConstant(InsertPSMask, DL, MVT::i8));
|
||||
}
|
||||
|
||||
// Attempt to merge insertps Op0 with an inner target shuffle node.
|
||||
|
@ -33918,7 +33919,7 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
|
|||
|
||||
if (Updated)
|
||||
return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, Op1,
|
||||
DAG.getTargetConstant(InsertPSMask, DL, MVT::i8));
|
||||
DAG.getConstant(InsertPSMask, DL, MVT::i8));
|
||||
}
|
||||
|
||||
// If we're inserting an element from a vbroadcast of a load, fold the
|
||||
|
@ -33931,7 +33932,7 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
|
|||
return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0,
|
||||
DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT,
|
||||
Op1.getOperand(0)),
|
||||
DAG.getTargetConstant(InsertPSMask & 0x3f, DL, MVT::i8));
|
||||
DAG.getConstant(InsertPSMask & 0x3f, DL, MVT::i8));
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
@ -34665,7 +34666,7 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
|
|||
}
|
||||
|
||||
SDLoc dl(Op);
|
||||
SDValue NewSA = TLO.DAG.getTargetConstant(Diff, dl, MVT::i8);
|
||||
SDValue NewSA = TLO.DAG.getConstant(Diff, dl, MVT::i8);
|
||||
return TLO.CombineTo(
|
||||
Op, TLO.DAG.getNode(Opc, dl, VT, Src.getOperand(0), NewSA));
|
||||
}
|
||||
|
@ -34704,7 +34705,7 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
|
|||
}
|
||||
|
||||
SDLoc dl(Op);
|
||||
SDValue NewSA = TLO.DAG.getTargetConstant(Diff, dl, MVT::i8);
|
||||
SDValue NewSA = TLO.DAG.getConstant(Diff, dl, MVT::i8);
|
||||
return TLO.CombineTo(
|
||||
Op, TLO.DAG.getNode(Opc, dl, VT, Src.getOperand(0), NewSA));
|
||||
}
|
||||
|
@ -35753,8 +35754,8 @@ static SDValue createMMXBuildVector(BuildVectorSDNode *BV, SelectionDAG &DAG,
|
|||
unsigned ShufMask = (NumElts > 2 ? 0 : 0x44);
|
||||
return DAG.getNode(
|
||||
ISD::INTRINSIC_WO_CHAIN, DL, MVT::x86mmx,
|
||||
DAG.getTargetConstant(Intrinsic::x86_sse_pshuf_w, DL, MVT::i32),
|
||||
Splat, DAG.getTargetConstant(ShufMask, DL, MVT::i8));
|
||||
DAG.getConstant(Intrinsic::x86_sse_pshuf_w, DL, MVT::i32), Splat,
|
||||
DAG.getConstant(ShufMask, DL, MVT::i8));
|
||||
}
|
||||
Ops.append(NumElts, Splat);
|
||||
} else {
|
||||
|
@ -36510,7 +36511,7 @@ static SDValue scalarizeExtEltFP(SDNode *ExtElt, SelectionDAG &DAG) {
|
|||
}
|
||||
|
||||
// TODO: This switch could include FNEG and the x86-specific FP logic ops
|
||||
// (FAND, FANDN, FOR, FXOR). But that may require enhancements to avoid
|
||||
// (FAND, FANDN, FOR, FXOR). But that may require enhancements to avoid
|
||||
// missed load folding and fma+fneg combining.
|
||||
switch (Vec.getOpcode()) {
|
||||
case ISD::FMA: // Begin 3 operands
|
||||
|
@ -38911,7 +38912,7 @@ static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG,
|
|||
if (NewShiftVal >= NumBitsPerElt)
|
||||
NewShiftVal = NumBitsPerElt - 1;
|
||||
return DAG.getNode(X86ISD::VSRAI, SDLoc(N), VT, N0.getOperand(0),
|
||||
DAG.getTargetConstant(NewShiftVal, SDLoc(N), MVT::i8));
|
||||
DAG.getConstant(NewShiftVal, SDLoc(N), MVT::i8));
|
||||
}
|
||||
|
||||
// We can decode 'whole byte' logical bit shifts as shuffles.
|
||||
|
@ -39031,7 +39032,7 @@ static SDValue combineCompareEqual(SDNode *N, SelectionDAG &DAG,
|
|||
if (Subtarget.hasAVX512()) {
|
||||
SDValue FSetCC =
|
||||
DAG.getNode(X86ISD::FSETCCM, DL, MVT::v1i1, CMP00, CMP01,
|
||||
DAG.getTargetConstant(x86cc, DL, MVT::i8));
|
||||
DAG.getConstant(x86cc, DL, MVT::i8));
|
||||
// Need to fill with zeros to ensure the bitcast will produce zeroes
|
||||
// for the upper bits. An EXTRACT_ELEMENT here wouldn't guarantee that.
|
||||
SDValue Ins = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, MVT::v16i1,
|
||||
|
@ -39040,9 +39041,10 @@ static SDValue combineCompareEqual(SDNode *N, SelectionDAG &DAG,
|
|||
return DAG.getZExtOrTrunc(DAG.getBitcast(MVT::i16, Ins), DL,
|
||||
N->getSimpleValueType(0));
|
||||
}
|
||||
SDValue OnesOrZeroesF =
|
||||
DAG.getNode(X86ISD::FSETCC, DL, CMP00.getValueType(), CMP00,
|
||||
CMP01, DAG.getTargetConstant(x86cc, DL, MVT::i8));
|
||||
SDValue OnesOrZeroesF = DAG.getNode(X86ISD::FSETCC, DL,
|
||||
CMP00.getValueType(), CMP00, CMP01,
|
||||
DAG.getConstant(x86cc, DL,
|
||||
MVT::i8));
|
||||
|
||||
bool is64BitFP = (CMP00.getValueType() == MVT::f64);
|
||||
MVT IntVT = is64BitFP ? MVT::i64 : MVT::i32;
|
||||
|
@ -39236,7 +39238,7 @@ static SDValue combineAndMaskToShift(SDNode *N, SelectionDAG &DAG,
|
|||
|
||||
SDLoc DL(N);
|
||||
unsigned ShiftVal = SplatVal.countTrailingOnes();
|
||||
SDValue ShAmt = DAG.getTargetConstant(EltBitWidth - ShiftVal, DL, MVT::i8);
|
||||
SDValue ShAmt = DAG.getConstant(EltBitWidth - ShiftVal, DL, MVT::i8);
|
||||
SDValue Shift = DAG.getNode(X86ISD::VSRLI, DL, VT0, Op0, ShAmt);
|
||||
return DAG.getBitcast(N->getValueType(0), Shift);
|
||||
}
|
||||
|
|
|
@ -753,14 +753,14 @@ let isCommutable = 1 in
|
|||
def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
|
||||
(ins VR128X:$src1, VR128X:$src2, u8imm:$src3),
|
||||
"vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
|
||||
[(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, timm:$src3))]>,
|
||||
[(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, imm:$src3))]>,
|
||||
EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
|
||||
def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
|
||||
(ins VR128X:$src1, f32mem:$src2, u8imm:$src3),
|
||||
"vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
|
||||
[(set VR128X:$dst, (X86insertps VR128X:$src1,
|
||||
(v4f32 (scalar_to_vector (loadf32 addr:$src2))),
|
||||
timm:$src3))]>,
|
||||
imm:$src3))]>,
|
||||
EVEX_4V, EVEX_CD8<32, CD8VT1>,
|
||||
Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
|
||||
}
|
||||
|
@ -2054,9 +2054,9 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
|
|||
(ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
|
||||
"vcmp"#_.Suffix,
|
||||
"$cc, $src2, $src1", "$src1, $src2, $cc",
|
||||
(OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
|
||||
(OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc),
|
||||
(OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
|
||||
timm:$cc)>, EVEX_4V, VEX_LIG, Sched<[sched]>;
|
||||
imm:$cc)>, EVEX_4V, VEX_LIG, Sched<[sched]>;
|
||||
let mayLoad = 1 in
|
||||
defm rm_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
|
||||
(outs _.KRC:$dst),
|
||||
|
@ -2064,9 +2064,9 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
|
|||
"vcmp"#_.Suffix,
|
||||
"$cc, $src2, $src1", "$src1, $src2, $cc",
|
||||
(OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
|
||||
timm:$cc),
|
||||
imm:$cc),
|
||||
(OpNode_su (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
|
||||
timm:$cc)>, EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
|
||||
imm:$cc)>, EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
|
||||
Sched<[sched.Folded, sched.ReadAfterFold]>;
|
||||
|
||||
defm rrb_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
|
||||
|
@ -2075,9 +2075,9 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
|
|||
"vcmp"#_.Suffix,
|
||||
"$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc",
|
||||
(OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
|
||||
timm:$cc),
|
||||
imm:$cc),
|
||||
(OpNodeSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
|
||||
timm:$cc)>,
|
||||
imm:$cc)>,
|
||||
EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>;
|
||||
|
||||
let isCodeGenOnly = 1 in {
|
||||
|
@ -2088,7 +2088,7 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
|
|||
"\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
|
||||
[(set _.KRC:$dst, (OpNode _.FRC:$src1,
|
||||
_.FRC:$src2,
|
||||
timm:$cc))]>,
|
||||
imm:$cc))]>,
|
||||
EVEX_4V, VEX_LIG, Sched<[sched]>;
|
||||
def rm : AVX512Ii8<0xC2, MRMSrcMem,
|
||||
(outs _.KRC:$dst),
|
||||
|
@ -2097,7 +2097,7 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
|
|||
"\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
|
||||
[(set _.KRC:$dst, (OpNode _.FRC:$src1,
|
||||
(_.ScalarLdFrag addr:$src2),
|
||||
timm:$cc))]>,
|
||||
imm:$cc))]>,
|
||||
EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
|
||||
Sched<[sched.Folded, sched.ReadAfterFold]>;
|
||||
}
|
||||
|
@ -2530,8 +2530,8 @@ multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
|
|||
(outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,u8imm:$cc),
|
||||
"vcmp"#_.Suffix,
|
||||
"$cc, $src2, $src1", "$src1, $src2, $cc",
|
||||
(X86cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
|
||||
(X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
|
||||
(X86cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc),
|
||||
(X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc),
|
||||
1>, Sched<[sched]>;
|
||||
|
||||
defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
|
||||
|
@ -2539,9 +2539,9 @@ multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
|
|||
"vcmp"#_.Suffix,
|
||||
"$cc, $src2, $src1", "$src1, $src2, $cc",
|
||||
(X86cmpm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
|
||||
timm:$cc),
|
||||
imm:$cc),
|
||||
(X86cmpm_su (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
|
||||
timm:$cc)>,
|
||||
imm:$cc)>,
|
||||
Sched<[sched.Folded, sched.ReadAfterFold]>;
|
||||
|
||||
defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
|
||||
|
@ -2552,10 +2552,10 @@ multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
|
|||
"$src1, ${src2}"#_.BroadcastStr#", $cc",
|
||||
(X86cmpm (_.VT _.RC:$src1),
|
||||
(_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
|
||||
timm:$cc),
|
||||
imm:$cc),
|
||||
(X86cmpm_su (_.VT _.RC:$src1),
|
||||
(_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
|
||||
timm:$cc)>,
|
||||
imm:$cc)>,
|
||||
EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
|
||||
|
||||
// Patterns for selecting with loads in other operand.
|
||||
|
@ -2592,9 +2592,9 @@ multiclass avx512_vcmp_sae<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
|
|||
"vcmp"#_.Suffix,
|
||||
"$cc, {sae}, $src2, $src1",
|
||||
"$src1, $src2, {sae}, $cc",
|
||||
(X86cmpmSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
|
||||
(X86cmpmSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc),
|
||||
(X86cmpmSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
|
||||
timm:$cc)>,
|
||||
imm:$cc)>,
|
||||
EVEX_B, Sched<[sched]>;
|
||||
}
|
||||
|
||||
|
@ -2649,7 +2649,7 @@ multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr,
|
|||
(ins _.RC:$src1, i32u8imm:$src2),
|
||||
OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
[(set _.KRC:$dst,(X86Vfpclasss (_.VT _.RC:$src1),
|
||||
(i32 timm:$src2)))]>,
|
||||
(i32 imm:$src2)))]>,
|
||||
Sched<[sched]>;
|
||||
def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
|
||||
(ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
|
||||
|
@ -2657,7 +2657,7 @@ multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr,
|
|||
"\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
|
||||
[(set _.KRC:$dst,(and _.KRCWM:$mask,
|
||||
(X86Vfpclasss_su (_.VT _.RC:$src1),
|
||||
(i32 timm:$src2))))]>,
|
||||
(i32 imm:$src2))))]>,
|
||||
EVEX_K, Sched<[sched]>;
|
||||
def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
|
||||
(ins _.IntScalarMemOp:$src1, i32u8imm:$src2),
|
||||
|
@ -2665,7 +2665,7 @@ multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr,
|
|||
"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
[(set _.KRC:$dst,
|
||||
(X86Vfpclasss _.ScalarIntMemCPat:$src1,
|
||||
(i32 timm:$src2)))]>,
|
||||
(i32 imm:$src2)))]>,
|
||||
Sched<[sched.Folded, sched.ReadAfterFold]>;
|
||||
def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
|
||||
(ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2),
|
||||
|
@ -2673,7 +2673,7 @@ multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr,
|
|||
"\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
|
||||
[(set _.KRC:$dst,(and _.KRCWM:$mask,
|
||||
(X86Vfpclasss_su _.ScalarIntMemCPat:$src1,
|
||||
(i32 timm:$src2))))]>,
|
||||
(i32 imm:$src2))))]>,
|
||||
EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
|
||||
}
|
||||
}
|
||||
|
@ -2689,7 +2689,7 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr,
|
|||
(ins _.RC:$src1, i32u8imm:$src2),
|
||||
OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
[(set _.KRC:$dst,(X86Vfpclass (_.VT _.RC:$src1),
|
||||
(i32 timm:$src2)))]>,
|
||||
(i32 imm:$src2)))]>,
|
||||
Sched<[sched]>;
|
||||
def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
|
||||
(ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
|
||||
|
@ -2697,7 +2697,7 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr,
|
|||
"\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
|
||||
[(set _.KRC:$dst,(and _.KRCWM:$mask,
|
||||
(X86Vfpclass_su (_.VT _.RC:$src1),
|
||||
(i32 timm:$src2))))]>,
|
||||
(i32 imm:$src2))))]>,
|
||||
EVEX_K, Sched<[sched]>;
|
||||
def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
|
||||
(ins _.MemOp:$src1, i32u8imm:$src2),
|
||||
|
@ -2705,7 +2705,7 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr,
|
|||
"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
[(set _.KRC:$dst,(X86Vfpclass
|
||||
(_.VT (_.LdFrag addr:$src1)),
|
||||
(i32 timm:$src2)))]>,
|
||||
(i32 imm:$src2)))]>,
|
||||
Sched<[sched.Folded, sched.ReadAfterFold]>;
|
||||
def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
|
||||
(ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
|
||||
|
@ -2713,7 +2713,7 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr,
|
|||
"\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
|
||||
[(set _.KRC:$dst, (and _.KRCWM:$mask, (X86Vfpclass_su
|
||||
(_.VT (_.LdFrag addr:$src1)),
|
||||
(i32 timm:$src2))))]>,
|
||||
(i32 imm:$src2))))]>,
|
||||
EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
|
||||
def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
|
||||
(ins _.ScalarMemOp:$src1, i32u8imm:$src2),
|
||||
|
@ -2723,7 +2723,7 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr,
|
|||
[(set _.KRC:$dst,(X86Vfpclass
|
||||
(_.VT (X86VBroadcast
|
||||
(_.ScalarLdFrag addr:$src1))),
|
||||
(i32 timm:$src2)))]>,
|
||||
(i32 imm:$src2)))]>,
|
||||
EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
|
||||
def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
|
||||
(ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2),
|
||||
|
@ -2733,7 +2733,7 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr,
|
|||
[(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclass_su
|
||||
(_.VT (X86VBroadcast
|
||||
(_.ScalarLdFrag addr:$src1))),
|
||||
(i32 timm:$src2))))]>,
|
||||
(i32 imm:$src2))))]>,
|
||||
EVEX_B, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
|
||||
}
|
||||
|
||||
|
@ -3111,7 +3111,7 @@ multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
|
|||
def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$imm, $src, $dst|$dst, $src, $imm}"),
|
||||
[(set KRC:$dst, (OpNode KRC:$src, (i8 timm:$imm)))]>,
|
||||
[(set KRC:$dst, (OpNode KRC:$src, (i8 imm:$imm)))]>,
|
||||
Sched<[sched]>;
|
||||
}
|
||||
|
||||
|
@ -3187,7 +3187,7 @@ multiclass axv512_cmp_packed_cc_no_vlx_lowering<SDNode OpNode, PatFrag OpNode_su
|
|||
X86VectorVTInfo Narrow,
|
||||
X86VectorVTInfo Wide> {
|
||||
def : Pat<(Narrow.KVT (OpNode (Narrow.VT Narrow.RC:$src1),
|
||||
(Narrow.VT Narrow.RC:$src2), timm:$cc)),
|
||||
(Narrow.VT Narrow.RC:$src2), imm:$cc)),
|
||||
(COPY_TO_REGCLASS
|
||||
(!cast<Instruction>(InstStr##Zrri)
|
||||
(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
|
||||
|
@ -3196,7 +3196,7 @@ def : Pat<(Narrow.KVT (OpNode (Narrow.VT Narrow.RC:$src1),
|
|||
|
||||
def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
|
||||
(OpNode_su (Narrow.VT Narrow.RC:$src1),
|
||||
(Narrow.VT Narrow.RC:$src2), timm:$cc))),
|
||||
(Narrow.VT Narrow.RC:$src2), imm:$cc))),
|
||||
(COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrrik)
|
||||
(COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
|
||||
(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
|
||||
|
@ -5787,13 +5787,13 @@ multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
|
|||
defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src1, u8imm:$src2), OpcodeStr,
|
||||
"$src2, $src1", "$src1, $src2",
|
||||
(_.VT (OpNode _.RC:$src1, (i8 timm:$src2)))>,
|
||||
(_.VT (OpNode _.RC:$src1, (i8 imm:$src2)))>,
|
||||
Sched<[sched]>;
|
||||
defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
|
||||
(ins _.MemOp:$src1, u8imm:$src2), OpcodeStr,
|
||||
"$src2, $src1", "$src1, $src2",
|
||||
(_.VT (OpNode (_.VT (_.LdFrag addr:$src1)),
|
||||
(i8 timm:$src2)))>,
|
||||
(i8 imm:$src2)))>,
|
||||
Sched<[sched.Folded]>;
|
||||
}
|
||||
}
|
||||
|
@ -5805,7 +5805,7 @@ multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
|
|||
defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
|
||||
(ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr,
|
||||
"$src2, ${src1}"##_.BroadcastStr, "${src1}"##_.BroadcastStr##", $src2",
|
||||
(_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src1)), (i8 timm:$src2)))>,
|
||||
(_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src1)), (i8 imm:$src2)))>,
|
||||
EVEX_B, Sched<[sched.Folded]>;
|
||||
}
|
||||
|
||||
|
@ -5947,13 +5947,13 @@ let Predicates = [HasAVX512, NoVLX] in {
|
|||
(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
|
||||
VR128X:$src2)), sub_xmm)>;
|
||||
|
||||
def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 timm:$src2))),
|
||||
def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 imm:$src2))),
|
||||
(EXTRACT_SUBREG (v8i64
|
||||
(VPSRAQZri
|
||||
(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
|
||||
imm:$src2)), sub_ymm)>;
|
||||
|
||||
def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 timm:$src2))),
|
||||
def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 imm:$src2))),
|
||||
(EXTRACT_SUBREG (v8i64
|
||||
(VPSRAQZri
|
||||
(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
|
||||
|
@ -6098,23 +6098,23 @@ let Predicates = [HasAVX512, NoVLX] in {
|
|||
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
|
||||
sub_ymm)>;
|
||||
|
||||
def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 timm:$src2))),
|
||||
def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 imm:$src2))),
|
||||
(EXTRACT_SUBREG (v8i64
|
||||
(VPROLQZri
|
||||
(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
|
||||
imm:$src2)), sub_xmm)>;
|
||||
def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 timm:$src2))),
|
||||
def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 imm:$src2))),
|
||||
(EXTRACT_SUBREG (v8i64
|
||||
(VPROLQZri
|
||||
(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
|
||||
imm:$src2)), sub_ymm)>;
|
||||
|
||||
def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 timm:$src2))),
|
||||
def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 imm:$src2))),
|
||||
(EXTRACT_SUBREG (v16i32
|
||||
(VPROLDZri
|
||||
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
|
||||
imm:$src2)), sub_xmm)>;
|
||||
def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 timm:$src2))),
|
||||
def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 imm:$src2))),
|
||||
(EXTRACT_SUBREG (v16i32
|
||||
(VPROLDZri
|
||||
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
|
||||
|
@ -6149,23 +6149,23 @@ let Predicates = [HasAVX512, NoVLX] in {
|
|||
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
|
||||
sub_ymm)>;
|
||||
|
||||
def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 timm:$src2))),
|
||||
def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 imm:$src2))),
|
||||
(EXTRACT_SUBREG (v8i64
|
||||
(VPRORQZri
|
||||
(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
|
||||
imm:$src2)), sub_xmm)>;
|
||||
def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 timm:$src2))),
|
||||
def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 imm:$src2))),
|
||||
(EXTRACT_SUBREG (v8i64
|
||||
(VPRORQZri
|
||||
(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
|
||||
imm:$src2)), sub_ymm)>;
|
||||
|
||||
def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 timm:$src2))),
|
||||
def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 imm:$src2))),
|
||||
(EXTRACT_SUBREG (v16i32
|
||||
(VPRORDZri
|
||||
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
|
||||
imm:$src2)), sub_xmm)>;
|
||||
def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 timm:$src2))),
|
||||
def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 imm:$src2))),
|
||||
(EXTRACT_SUBREG (v16i32
|
||||
(VPRORDZri
|
||||
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
|
||||
|
@ -8612,21 +8612,21 @@ let ExeDomain = GenericDomain in {
|
|||
(ins _src.RC:$src1, i32u8imm:$src2),
|
||||
"vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
[(set _dest.RC:$dst,
|
||||
(X86cvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>,
|
||||
(X86cvtps2ph (_src.VT _src.RC:$src1), (i32 imm:$src2)))]>,
|
||||
Sched<[RR]>;
|
||||
let Constraints = "$src0 = $dst" in
|
||||
def rrk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
|
||||
(ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
|
||||
"vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
|
||||
[(set _dest.RC:$dst,
|
||||
(X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2),
|
||||
(X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 imm:$src2),
|
||||
_dest.RC:$src0, _src.KRCWM:$mask))]>,
|
||||
Sched<[RR]>, EVEX_K;
|
||||
def rrkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
|
||||
(ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
|
||||
"vcvtps2ph\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}",
|
||||
[(set _dest.RC:$dst,
|
||||
(X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2),
|
||||
(X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 imm:$src2),
|
||||
_dest.ImmAllZerosV, _src.KRCWM:$mask))]>,
|
||||
Sched<[RR]>, EVEX_KZ;
|
||||
let hasSideEffects = 0, mayStore = 1 in {
|
||||
|
@ -9085,14 +9085,14 @@ multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
|
|||
(ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
|
||||
"$src3, $src2, $src1", "$src1, $src2, $src3",
|
||||
(_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
|
||||
(i32 timm:$src3)))>,
|
||||
(i32 imm:$src3)))>,
|
||||
Sched<[sched]>;
|
||||
|
||||
defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
|
||||
"$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3",
|
||||
(_.VT (X86RndScalesSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
|
||||
(i32 timm:$src3)))>, EVEX_B,
|
||||
(i32 imm:$src3)))>, EVEX_B,
|
||||
Sched<[sched]>;
|
||||
|
||||
defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
|
||||
|
@ -9100,7 +9100,7 @@ multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
|
|||
OpcodeStr,
|
||||
"$src3, $src2, $src1", "$src1, $src2, $src3",
|
||||
(_.VT (X86RndScales _.RC:$src1,
|
||||
_.ScalarIntMemCPat:$src2, (i32 timm:$src3)))>,
|
||||
_.ScalarIntMemCPat:$src2, (i32 imm:$src3)))>,
|
||||
Sched<[sched.Folded, sched.ReadAfterFold]>;
|
||||
|
||||
let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in {
|
||||
|
@ -9118,13 +9118,13 @@ multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
|
|||
}
|
||||
|
||||
let Predicates = [HasAVX512] in {
|
||||
def : Pat<(X86VRndScale _.FRC:$src1, timm:$src2),
|
||||
def : Pat<(X86VRndScale _.FRC:$src1, imm:$src2),
|
||||
(_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
|
||||
_.FRC:$src1, imm:$src2))>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX512, OptForSize] in {
|
||||
def : Pat<(X86VRndScale (_.ScalarLdFrag addr:$src1), timm:$src2),
|
||||
def : Pat<(X86VRndScale (_.ScalarLdFrag addr:$src1), imm:$src2),
|
||||
(_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
|
||||
addr:$src1, imm:$src2))>;
|
||||
}
|
||||
|
@ -10145,19 +10145,19 @@ multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNo
|
|||
(ins _.RC:$src1, i32u8imm:$src2),
|
||||
OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
|
||||
(OpNode (_.VT _.RC:$src1),
|
||||
(i32 timm:$src2))>, Sched<[sched]>;
|
||||
(i32 imm:$src2))>, Sched<[sched]>;
|
||||
defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
|
||||
(ins _.MemOp:$src1, i32u8imm:$src2),
|
||||
OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
|
||||
(OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
|
||||
(i32 timm:$src2))>,
|
||||
(i32 imm:$src2))>,
|
||||
Sched<[sched.Folded, sched.ReadAfterFold]>;
|
||||
defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
|
||||
(ins _.ScalarMemOp:$src1, i32u8imm:$src2),
|
||||
OpcodeStr##_.Suffix, "$src2, ${src1}"##_.BroadcastStr,
|
||||
"${src1}"##_.BroadcastStr##", $src2",
|
||||
(OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src1))),
|
||||
(i32 timm:$src2))>, EVEX_B,
|
||||
(i32 imm:$src2))>, EVEX_B,
|
||||
Sched<[sched.Folded, sched.ReadAfterFold]>;
|
||||
}
|
||||
}
|
||||
|
@ -10172,7 +10172,7 @@ multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
|
|||
OpcodeStr##_.Suffix, "$src2, {sae}, $src1",
|
||||
"$src1, {sae}, $src2",
|
||||
(OpNode (_.VT _.RC:$src1),
|
||||
(i32 timm:$src2))>,
|
||||
(i32 imm:$src2))>,
|
||||
EVEX_B, Sched<[sched]>;
|
||||
}
|
||||
|
||||
|
@ -10205,14 +10205,14 @@ multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
|||
OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
|
||||
(OpNode (_.VT _.RC:$src1),
|
||||
(_.VT _.RC:$src2),
|
||||
(i32 timm:$src3))>,
|
||||
(i32 imm:$src3))>,
|
||||
Sched<[sched]>;
|
||||
defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3),
|
||||
OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
|
||||
(OpNode (_.VT _.RC:$src1),
|
||||
(_.VT (bitconvert (_.LdFrag addr:$src2))),
|
||||
(i32 timm:$src3))>,
|
||||
(i32 imm:$src3))>,
|
||||
Sched<[sched.Folded, sched.ReadAfterFold]>;
|
||||
defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
|
||||
|
@ -10220,7 +10220,7 @@ multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
|||
"$src1, ${src2}"##_.BroadcastStr##", $src3",
|
||||
(OpNode (_.VT _.RC:$src1),
|
||||
(_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
|
||||
(i32 timm:$src3))>, EVEX_B,
|
||||
(i32 imm:$src3))>, EVEX_B,
|
||||
Sched<[sched.Folded, sched.ReadAfterFold]>;
|
||||
}
|
||||
}
|
||||
|
@ -10236,7 +10236,7 @@ multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
|||
OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
|
||||
(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
|
||||
(SrcInfo.VT SrcInfo.RC:$src2),
|
||||
(i8 timm:$src3)))>,
|
||||
(i8 imm:$src3)))>,
|
||||
Sched<[sched]>;
|
||||
defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
|
||||
(ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3),
|
||||
|
@ -10244,7 +10244,7 @@ multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
|||
(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
|
||||
(SrcInfo.VT (bitconvert
|
||||
(SrcInfo.LdFrag addr:$src2))),
|
||||
(i8 timm:$src3)))>,
|
||||
(i8 imm:$src3)))>,
|
||||
Sched<[sched.Folded, sched.ReadAfterFold]>;
|
||||
}
|
||||
}
|
||||
|
@ -10263,7 +10263,7 @@ multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
|||
"$src1, ${src2}"##_.BroadcastStr##", $src3",
|
||||
(OpNode (_.VT _.RC:$src1),
|
||||
(_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
|
||||
(i8 timm:$src3))>, EVEX_B,
|
||||
(i8 imm:$src3))>, EVEX_B,
|
||||
Sched<[sched.Folded, sched.ReadAfterFold]>;
|
||||
}
|
||||
|
||||
|
@ -10277,7 +10277,7 @@ multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
|||
OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
|
||||
(OpNode (_.VT _.RC:$src1),
|
||||
(_.VT _.RC:$src2),
|
||||
(i32 timm:$src3))>,
|
||||
(i32 imm:$src3))>,
|
||||
Sched<[sched]>;
|
||||
defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
|
||||
|
@ -10301,7 +10301,7 @@ multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
|
|||
"$src1, $src2, {sae}, $src3",
|
||||
(OpNode (_.VT _.RC:$src1),
|
||||
(_.VT _.RC:$src2),
|
||||
(i32 timm:$src3))>,
|
||||
(i32 imm:$src3))>,
|
||||
EVEX_B, Sched<[sched]>;
|
||||
}
|
||||
|
||||
|
@ -10315,7 +10315,7 @@ multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode
|
|||
"$src1, $src2, {sae}, $src3",
|
||||
(OpNode (_.VT _.RC:$src1),
|
||||
(_.VT _.RC:$src2),
|
||||
(i32 timm:$src3))>,
|
||||
(i32 imm:$src3))>,
|
||||
EVEX_B, Sched<[sched]>;
|
||||
}
|
||||
|
||||
|
@ -10437,7 +10437,7 @@ multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr,
|
|||
OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
|
||||
(_.VT (bitconvert
|
||||
(CastInfo.VT (X86Shuf128 _.RC:$src1, _.RC:$src2,
|
||||
(i8 timm:$src3)))))>,
|
||||
(i8 imm:$src3)))))>,
|
||||
Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr">;
|
||||
defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
|
||||
|
@ -10446,7 +10446,7 @@ multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr,
|
|||
(bitconvert
|
||||
(CastInfo.VT (X86Shuf128 _.RC:$src1,
|
||||
(CastInfo.LdFrag addr:$src2),
|
||||
(i8 timm:$src3)))))>,
|
||||
(i8 imm:$src3)))))>,
|
||||
Sched<[sched.Folded, sched.ReadAfterFold]>,
|
||||
EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
|
||||
defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
|
||||
|
@ -10458,7 +10458,7 @@ multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr,
|
|||
(CastInfo.VT
|
||||
(X86Shuf128 _.RC:$src1,
|
||||
(X86VBroadcast (_.ScalarLdFrag addr:$src2)),
|
||||
(i8 timm:$src3)))))>, EVEX_B,
|
||||
(i8 imm:$src3)))))>, EVEX_B,
|
||||
Sched<[sched.Folded, sched.ReadAfterFold]>;
|
||||
}
|
||||
}
|
||||
|
@ -10527,14 +10527,14 @@ multiclass avx512_valign<bits<8> opc, string OpcodeStr,
|
|||
defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
|
||||
OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
|
||||
(_.VT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 timm:$src3)))>,
|
||||
(_.VT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 imm:$src3)))>,
|
||||
Sched<[sched]>, EVEX2VEXOverride<"VPALIGNRrri">;
|
||||
defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
|
||||
OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
|
||||
(_.VT (X86VAlign _.RC:$src1,
|
||||
(bitconvert (_.LdFrag addr:$src2)),
|
||||
(i8 timm:$src3)))>,
|
||||
(i8 imm:$src3)))>,
|
||||
Sched<[sched.Folded, sched.ReadAfterFold]>,
|
||||
EVEX2VEXOverride<"VPALIGNRrmi">;
|
||||
|
||||
|
@ -10544,7 +10544,7 @@ multiclass avx512_valign<bits<8> opc, string OpcodeStr,
|
|||
"$src1, ${src2}"##_.BroadcastStr##", $src3",
|
||||
(X86VAlign _.RC:$src1,
|
||||
(_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
|
||||
(i8 timm:$src3))>, EVEX_B,
|
||||
(i8 imm:$src3))>, EVEX_B,
|
||||
Sched<[sched.Folded, sched.ReadAfterFold]>;
|
||||
}
|
||||
}
|
||||
|
@ -10593,7 +10593,7 @@ multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
|
|||
def : Pat<(To.VT (vselect To.KRCWM:$mask,
|
||||
(bitconvert
|
||||
(From.VT (OpNode From.RC:$src1, From.RC:$src2,
|
||||
timm:$src3))),
|
||||
imm:$src3))),
|
||||
To.RC:$src0)),
|
||||
(!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask,
|
||||
To.RC:$src1, To.RC:$src2,
|
||||
|
@ -10602,7 +10602,7 @@ multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
|
|||
def : Pat<(To.VT (vselect To.KRCWM:$mask,
|
||||
(bitconvert
|
||||
(From.VT (OpNode From.RC:$src1, From.RC:$src2,
|
||||
timm:$src3))),
|
||||
imm:$src3))),
|
||||
To.ImmAllZerosV)),
|
||||
(!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask,
|
||||
To.RC:$src1, To.RC:$src2,
|
||||
|
@ -10612,7 +10612,7 @@ multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
|
|||
(bitconvert
|
||||
(From.VT (OpNode From.RC:$src1,
|
||||
(From.LdFrag addr:$src2),
|
||||
timm:$src3))),
|
||||
imm:$src3))),
|
||||
To.RC:$src0)),
|
||||
(!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask,
|
||||
To.RC:$src1, addr:$src2,
|
||||
|
@ -10622,7 +10622,7 @@ multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
|
|||
(bitconvert
|
||||
(From.VT (OpNode From.RC:$src1,
|
||||
(From.LdFrag addr:$src2),
|
||||
timm:$src3))),
|
||||
imm:$src3))),
|
||||
To.ImmAllZerosV)),
|
||||
(!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask,
|
||||
To.RC:$src1, addr:$src2,
|
||||
|
@ -10637,7 +10637,7 @@ multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode,
|
|||
def : Pat<(From.VT (OpNode From.RC:$src1,
|
||||
(bitconvert (To.VT (X86VBroadcast
|
||||
(To.ScalarLdFrag addr:$src2)))),
|
||||
timm:$src3)),
|
||||
imm:$src3)),
|
||||
(!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2,
|
||||
(ImmXForm imm:$src3))>;
|
||||
|
||||
|
@ -10647,7 +10647,7 @@ multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode,
|
|||
(bitconvert
|
||||
(To.VT (X86VBroadcast
|
||||
(To.ScalarLdFrag addr:$src2)))),
|
||||
timm:$src3))),
|
||||
imm:$src3))),
|
||||
To.RC:$src0)),
|
||||
(!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask,
|
||||
To.RC:$src1, addr:$src2,
|
||||
|
@ -10659,7 +10659,7 @@ multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode,
|
|||
(bitconvert
|
||||
(To.VT (X86VBroadcast
|
||||
(To.ScalarLdFrag addr:$src2)))),
|
||||
timm:$src3))),
|
||||
imm:$src3))),
|
||||
To.ImmAllZerosV)),
|
||||
(!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask,
|
||||
To.RC:$src1, addr:$src2,
|
||||
|
@ -11103,14 +11103,14 @@ multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr,
|
|||
def rr : AVX512<opc, MRMr,
|
||||
(outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 timm:$src2))))]>,
|
||||
[(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 imm:$src2))))]>,
|
||||
Sched<[sched]>;
|
||||
def rm : AVX512<opc, MRMm,
|
||||
(outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(set _.RC:$dst,(_.VT (OpNode
|
||||
(_.VT (bitconvert (_.LdFrag addr:$src1))),
|
||||
(i8 timm:$src2))))]>,
|
||||
(i8 imm:$src2))))]>,
|
||||
Sched<[sched.Folded, sched.ReadAfterFold]>;
|
||||
}
|
||||
|
||||
|
@ -11243,7 +11243,7 @@ multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
|||
(OpNode (_.VT _.RC:$src1),
|
||||
(_.VT _.RC:$src2),
|
||||
(_.VT _.RC:$src3),
|
||||
(i8 timm:$src4)), 1, 1>,
|
||||
(i8 imm:$src4)), 1, 1>,
|
||||
AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
|
||||
defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4),
|
||||
|
@ -11251,7 +11251,7 @@ multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
|||
(OpNode (_.VT _.RC:$src1),
|
||||
(_.VT _.RC:$src2),
|
||||
(_.VT (bitconvert (_.LdFrag addr:$src3))),
|
||||
(i8 timm:$src4)), 1, 0>,
|
||||
(i8 imm:$src4)), 1, 0>,
|
||||
AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
|
||||
Sched<[sched.Folded, sched.ReadAfterFold]>;
|
||||
defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
|
||||
|
@ -11261,31 +11261,31 @@ multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
|||
(OpNode (_.VT _.RC:$src1),
|
||||
(_.VT _.RC:$src2),
|
||||
(_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
|
||||
(i8 timm:$src4)), 1, 0>, EVEX_B,
|
||||
(i8 imm:$src4)), 1, 0>, EVEX_B,
|
||||
AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
|
||||
Sched<[sched.Folded, sched.ReadAfterFold]>;
|
||||
}// Constraints = "$src1 = $dst"
|
||||
|
||||
// Additional patterns for matching passthru operand in other positions.
|
||||
def : Pat<(_.VT (vselect _.KRCWM:$mask,
|
||||
(OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
|
||||
(OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
|
||||
_.RC:$src1)),
|
||||
(!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
|
||||
_.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
|
||||
def : Pat<(_.VT (vselect _.KRCWM:$mask,
|
||||
(OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 timm:$src4)),
|
||||
(OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 imm:$src4)),
|
||||
_.RC:$src1)),
|
||||
(!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
|
||||
_.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
|
||||
|
||||
// Additional patterns for matching loads in other positions.
|
||||
def : Pat<(_.VT (OpNode (bitconvert (_.LdFrag addr:$src3)),
|
||||
_.RC:$src2, _.RC:$src1, (i8 timm:$src4))),
|
||||
_.RC:$src2, _.RC:$src1, (i8 imm:$src4))),
|
||||
(!cast<Instruction>(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
|
||||
addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
|
||||
def : Pat<(_.VT (OpNode _.RC:$src1,
|
||||
(bitconvert (_.LdFrag addr:$src3)),
|
||||
_.RC:$src2, (i8 timm:$src4))),
|
||||
_.RC:$src2, (i8 imm:$src4))),
|
||||
(!cast<Instruction>(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
|
||||
addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
|
||||
|
||||
|
@ -11293,13 +11293,13 @@ multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
|||
// positions.
|
||||
def : Pat<(_.VT (vselect _.KRCWM:$mask,
|
||||
(OpNode (bitconvert (_.LdFrag addr:$src3)),
|
||||
_.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
|
||||
_.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
|
||||
_.ImmAllZerosV)),
|
||||
(!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
|
||||
_.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
|
||||
def : Pat<(_.VT (vselect _.KRCWM:$mask,
|
||||
(OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
|
||||
_.RC:$src2, (i8 timm:$src4)),
|
||||
_.RC:$src2, (i8 imm:$src4)),
|
||||
_.ImmAllZerosV)),
|
||||
(!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
|
||||
_.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
|
||||
|
@ -11308,43 +11308,43 @@ multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
|||
// operand orders.
|
||||
def : Pat<(_.VT (vselect _.KRCWM:$mask,
|
||||
(OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
|
||||
_.RC:$src2, (i8 timm:$src4)),
|
||||
_.RC:$src2, (i8 imm:$src4)),
|
||||
_.RC:$src1)),
|
||||
(!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
|
||||
_.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
|
||||
def : Pat<(_.VT (vselect _.KRCWM:$mask,
|
||||
(OpNode (bitconvert (_.LdFrag addr:$src3)),
|
||||
_.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
|
||||
_.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
|
||||
_.RC:$src1)),
|
||||
(!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
|
||||
_.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
|
||||
def : Pat<(_.VT (vselect _.KRCWM:$mask,
|
||||
(OpNode _.RC:$src2, _.RC:$src1,
|
||||
(bitconvert (_.LdFrag addr:$src3)), (i8 timm:$src4)),
|
||||
(bitconvert (_.LdFrag addr:$src3)), (i8 imm:$src4)),
|
||||
_.RC:$src1)),
|
||||
(!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
|
||||
_.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
|
||||
def : Pat<(_.VT (vselect _.KRCWM:$mask,
|
||||
(OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)),
|
||||
_.RC:$src1, (i8 timm:$src4)),
|
||||
_.RC:$src1, (i8 imm:$src4)),
|
||||
_.RC:$src1)),
|
||||
(!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
|
||||
_.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>;
|
||||
def : Pat<(_.VT (vselect _.KRCWM:$mask,
|
||||
(OpNode (bitconvert (_.LdFrag addr:$src3)),
|
||||
_.RC:$src1, _.RC:$src2, (i8 timm:$src4)),
|
||||
_.RC:$src1, _.RC:$src2, (i8 imm:$src4)),
|
||||
_.RC:$src1)),
|
||||
(!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
|
||||
_.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>;
|
||||
|
||||
// Additional patterns for matching broadcasts in other positions.
|
||||
def : Pat<(_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
|
||||
_.RC:$src2, _.RC:$src1, (i8 timm:$src4))),
|
||||
_.RC:$src2, _.RC:$src1, (i8 imm:$src4))),
|
||||
(!cast<Instruction>(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
|
||||
addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
|
||||
def : Pat<(_.VT (OpNode _.RC:$src1,
|
||||
(X86VBroadcast (_.ScalarLdFrag addr:$src3)),
|
||||
_.RC:$src2, (i8 timm:$src4))),
|
||||
_.RC:$src2, (i8 imm:$src4))),
|
||||
(!cast<Instruction>(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
|
||||
addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
|
||||
|
||||
|
@ -11352,7 +11352,7 @@ multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
|||
// positions.
|
||||
def : Pat<(_.VT (vselect _.KRCWM:$mask,
|
||||
(OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
|
||||
_.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
|
||||
_.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
|
||||
_.ImmAllZerosV)),
|
||||
(!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
|
||||
_.KRCWM:$mask, _.RC:$src2, addr:$src3,
|
||||
|
@ -11360,7 +11360,7 @@ multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
|||
def : Pat<(_.VT (vselect _.KRCWM:$mask,
|
||||
(OpNode _.RC:$src1,
|
||||
(X86VBroadcast (_.ScalarLdFrag addr:$src3)),
|
||||
_.RC:$src2, (i8 timm:$src4)),
|
||||
_.RC:$src2, (i8 imm:$src4)),
|
||||
_.ImmAllZerosV)),
|
||||
(!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
|
||||
_.KRCWM:$mask, _.RC:$src2, addr:$src3,
|
||||
|
@ -11371,32 +11371,32 @@ multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
|||
def : Pat<(_.VT (vselect _.KRCWM:$mask,
|
||||
(OpNode _.RC:$src1,
|
||||
(X86VBroadcast (_.ScalarLdFrag addr:$src3)),
|
||||
_.RC:$src2, (i8 timm:$src4)),
|
||||
_.RC:$src2, (i8 imm:$src4)),
|
||||
_.RC:$src1)),
|
||||
(!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
|
||||
_.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
|
||||
def : Pat<(_.VT (vselect _.KRCWM:$mask,
|
||||
(OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
|
||||
_.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
|
||||
_.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
|
||||
_.RC:$src1)),
|
||||
(!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
|
||||
_.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
|
||||
def : Pat<(_.VT (vselect _.KRCWM:$mask,
|
||||
(OpNode _.RC:$src2, _.RC:$src1,
|
||||
(X86VBroadcast (_.ScalarLdFrag addr:$src3)),
|
||||
(i8 timm:$src4)), _.RC:$src1)),
|
||||
(i8 imm:$src4)), _.RC:$src1)),
|
||||
(!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
|
||||
_.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
|
||||
def : Pat<(_.VT (vselect _.KRCWM:$mask,
|
||||
(OpNode _.RC:$src2,
|
||||
(X86VBroadcast (_.ScalarLdFrag addr:$src3)),
|
||||
_.RC:$src1, (i8 timm:$src4)),
|
||||
_.RC:$src1, (i8 imm:$src4)),
|
||||
_.RC:$src1)),
|
||||
(!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
|
||||
_.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>;
|
||||
def : Pat<(_.VT (vselect _.KRCWM:$mask,
|
||||
(OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
|
||||
_.RC:$src1, _.RC:$src2, (i8 timm:$src4)),
|
||||
_.RC:$src1, _.RC:$src2, (i8 imm:$src4)),
|
||||
_.RC:$src1)),
|
||||
(!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
|
||||
_.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>;
|
||||
|
@ -11531,14 +11531,14 @@ multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr,
|
|||
(X86VFixupimm (_.VT _.RC:$src1),
|
||||
(_.VT _.RC:$src2),
|
||||
(TblVT.VT _.RC:$src3),
|
||||
(i32 timm:$src4))>, Sched<[sched]>;
|
||||
(i32 imm:$src4))>, Sched<[sched]>;
|
||||
defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4),
|
||||
OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
|
||||
(X86VFixupimm (_.VT _.RC:$src1),
|
||||
(_.VT _.RC:$src2),
|
||||
(TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))),
|
||||
(i32 timm:$src4))>,
|
||||
(i32 imm:$src4))>,
|
||||
Sched<[sched.Folded, sched.ReadAfterFold]>;
|
||||
defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
|
||||
|
@ -11547,7 +11547,7 @@ multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr,
|
|||
(X86VFixupimm (_.VT _.RC:$src1),
|
||||
(_.VT _.RC:$src2),
|
||||
(TblVT.VT (X86VBroadcast(TblVT.ScalarLdFrag addr:$src3))),
|
||||
(i32 timm:$src4))>,
|
||||
(i32 imm:$src4))>,
|
||||
EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
|
||||
} // Constraints = "$src1 = $dst"
|
||||
}
|
||||
|
@ -11564,7 +11564,7 @@ let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
|
|||
(X86VFixupimmSAE (_.VT _.RC:$src1),
|
||||
(_.VT _.RC:$src2),
|
||||
(TblVT.VT _.RC:$src3),
|
||||
(i32 timm:$src4))>,
|
||||
(i32 imm:$src4))>,
|
||||
EVEX_B, Sched<[sched]>;
|
||||
}
|
||||
}
|
||||
|
@ -11580,7 +11580,7 @@ multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr,
|
|||
(X86VFixupimms (_.VT _.RC:$src1),
|
||||
(_.VT _.RC:$src2),
|
||||
(_src3VT.VT _src3VT.RC:$src3),
|
||||
(i32 timm:$src4))>, Sched<[sched]>;
|
||||
(i32 imm:$src4))>, Sched<[sched]>;
|
||||
defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
|
||||
OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
|
||||
|
@ -11588,7 +11588,7 @@ multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr,
|
|||
(X86VFixupimmSAEs (_.VT _.RC:$src1),
|
||||
(_.VT _.RC:$src2),
|
||||
(_src3VT.VT _src3VT.RC:$src3),
|
||||
(i32 timm:$src4))>,
|
||||
(i32 imm:$src4))>,
|
||||
EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
|
||||
defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
|
||||
|
@ -11597,13 +11597,13 @@ multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr,
|
|||
(_.VT _.RC:$src2),
|
||||
(_src3VT.VT (scalar_to_vector
|
||||
(_src3VT.ScalarLdFrag addr:$src3))),
|
||||
(i32 timm:$src4))>,
|
||||
(i32 imm:$src4))>,
|
||||
Sched<[sched.Folded, sched.ReadAfterFold]>;
|
||||
}
|
||||
}
|
||||
|
||||
multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched,
|
||||
AVX512VLVectorVTInfo _Vec,
|
||||
AVX512VLVectorVTInfo _Vec,
|
||||
AVX512VLVectorVTInfo _Tbl> {
|
||||
let Predicates = [HasAVX512] in
|
||||
defm Z : avx512_fixupimm_packed_sae<0x54, "vfixupimm", sched.ZMM,
|
||||
|
@ -12072,7 +12072,7 @@ multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode,
|
|||
"$src1, ${src2}"##BcstVTI.BroadcastStr##", $src3",
|
||||
(OpNode (VTI.VT VTI.RC:$src1),
|
||||
(bitconvert (BcstVTI.VT (X86VBroadcast (loadi64 addr:$src2)))),
|
||||
(i8 timm:$src3))>, EVEX_B,
|
||||
(i8 imm:$src3))>, EVEX_B,
|
||||
Sched<[sched.Folded, sched.ReadAfterFold]>;
|
||||
}
|
||||
|
||||
|
|
|
@ -114,13 +114,13 @@ multiclass ssse3_palign_mm<string asm, Intrinsic IntId,
|
|||
def rri : MMXSS3AI<0x0F, MRMSrcReg, (outs VR64:$dst),
|
||||
(ins VR64:$src1, VR64:$src2, u8imm:$src3),
|
||||
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||
[(set VR64:$dst, (IntId VR64:$src1, VR64:$src2, (i8 timm:$src3)))]>,
|
||||
[(set VR64:$dst, (IntId VR64:$src1, VR64:$src2, (i8 imm:$src3)))]>,
|
||||
Sched<[sched]>;
|
||||
def rmi : MMXSS3AI<0x0F, MRMSrcMem, (outs VR64:$dst),
|
||||
(ins VR64:$src1, i64mem:$src2, u8imm:$src3),
|
||||
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||
[(set VR64:$dst, (IntId VR64:$src1,
|
||||
(bitconvert (load_mmx addr:$src2)), (i8 timm:$src3)))]>,
|
||||
(bitconvert (load_mmx addr:$src2)), (i8 imm:$src3)))]>,
|
||||
Sched<[sched.Folded, sched.ReadAfterFold]>;
|
||||
}
|
||||
|
||||
|
@ -496,14 +496,14 @@ def MMX_PSHUFWri : MMXIi8<0x70, MRMSrcReg,
|
|||
(outs VR64:$dst), (ins VR64:$src1, u8imm:$src2),
|
||||
"pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
[(set VR64:$dst,
|
||||
(int_x86_sse_pshuf_w VR64:$src1, timm:$src2))]>,
|
||||
(int_x86_sse_pshuf_w VR64:$src1, imm:$src2))]>,
|
||||
Sched<[SchedWriteShuffle.MMX]>;
|
||||
def MMX_PSHUFWmi : MMXIi8<0x70, MRMSrcMem,
|
||||
(outs VR64:$dst), (ins i64mem:$src1, u8imm:$src2),
|
||||
"pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
[(set VR64:$dst,
|
||||
(int_x86_sse_pshuf_w (load_mmx addr:$src1),
|
||||
timm:$src2))]>,
|
||||
imm:$src2))]>,
|
||||
Sched<[SchedWriteShuffle.MMX.Folded]>;
|
||||
|
||||
// -- Conversion Instructions
|
||||
|
|
|
@ -370,7 +370,7 @@ defm VMOVAPDY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv4f64, "movapd
|
|||
defm VMOVUPSY : sse12_mov_packed<0x10, VR256, f256mem, loadv8f32, "movups",
|
||||
SSEPackedSingle, SchedWriteFMoveLS.YMM>,
|
||||
PS, VEX, VEX_L, VEX_WIG;
|
||||
defm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64, "movupd",
|
||||
defm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64, "movupd",
|
||||
SSEPackedDouble, SchedWriteFMoveLS.YMM>,
|
||||
PD, VEX, VEX_L, VEX_WIG;
|
||||
}
|
||||
|
@ -1728,12 +1728,12 @@ multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
|
|||
let isCommutable = 1 in
|
||||
def rr : SIi8<0xC2, MRMSrcReg,
|
||||
(outs RC:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc), asm,
|
||||
[(set RC:$dst, (OpNode (VT RC:$src1), RC:$src2, timm:$cc))]>,
|
||||
[(set RC:$dst, (OpNode (VT RC:$src1), RC:$src2, imm:$cc))]>,
|
||||
Sched<[sched]>;
|
||||
def rm : SIi8<0xC2, MRMSrcMem,
|
||||
(outs RC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc), asm,
|
||||
[(set RC:$dst, (OpNode (VT RC:$src1),
|
||||
(ld_frag addr:$src2), timm:$cc))]>,
|
||||
(ld_frag addr:$src2), imm:$cc))]>,
|
||||
Sched<[sched.Folded, sched.ReadAfterFold]>;
|
||||
}
|
||||
|
||||
|
@ -1766,13 +1766,13 @@ multiclass sse12_cmp_scalar_int<Operand memop,
|
|||
def rr_Int : SIi8<0xC2, MRMSrcReg, (outs VR128:$dst),
|
||||
(ins VR128:$src1, VR128:$src, u8imm:$cc), asm,
|
||||
[(set VR128:$dst, (Int VR128:$src1,
|
||||
VR128:$src, timm:$cc))]>,
|
||||
VR128:$src, imm:$cc))]>,
|
||||
Sched<[sched]>;
|
||||
let mayLoad = 1 in
|
||||
def rm_Int : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst),
|
||||
(ins VR128:$src1, memop:$src, u8imm:$cc), asm,
|
||||
[(set VR128:$dst, (Int VR128:$src1,
|
||||
mem_cpat:$src, timm:$cc))]>,
|
||||
mem_cpat:$src, imm:$cc))]>,
|
||||
Sched<[sched.Folded, sched.ReadAfterFold]>;
|
||||
}
|
||||
|
||||
|
@ -1891,12 +1891,12 @@ multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop,
|
|||
let isCommutable = 1 in
|
||||
def rri : PIi8<0xC2, MRMSrcReg,
|
||||
(outs RC:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc), asm,
|
||||
[(set RC:$dst, (VT (X86cmpp RC:$src1, RC:$src2, timm:$cc)))], d>,
|
||||
[(set RC:$dst, (VT (X86cmpp RC:$src1, RC:$src2, imm:$cc)))], d>,
|
||||
Sched<[sched]>;
|
||||
def rmi : PIi8<0xC2, MRMSrcMem,
|
||||
(outs RC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc), asm,
|
||||
[(set RC:$dst,
|
||||
(VT (X86cmpp RC:$src1, (ld_frag addr:$src2), timm:$cc)))], d>,
|
||||
(VT (X86cmpp RC:$src1, (ld_frag addr:$src2), imm:$cc)))], d>,
|
||||
Sched<[sched.Folded, sched.ReadAfterFold]>;
|
||||
}
|
||||
|
||||
|
@ -1921,7 +1921,7 @@ let Constraints = "$src1 = $dst" in {
|
|||
SchedWriteFCmpSizes.PD.XMM, SSEPackedDouble, memopv2f64>, PD;
|
||||
}
|
||||
|
||||
def CommutableCMPCC : PatLeaf<(timm), [{
|
||||
def CommutableCMPCC : PatLeaf<(imm), [{
|
||||
uint64_t Imm = N->getZExtValue() & 0x7;
|
||||
return (Imm == 0x00 || Imm == 0x03 || Imm == 0x04 || Imm == 0x07);
|
||||
}]>;
|
||||
|
@ -1985,13 +1985,13 @@ multiclass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop,
|
|||
def rmi : PIi8<0xC6, MRMSrcMem, (outs RC:$dst),
|
||||
(ins RC:$src1, x86memop:$src2, u8imm:$src3), asm,
|
||||
[(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2),
|
||||
(i8 timm:$src3))))], d>,
|
||||
(i8 imm:$src3))))], d>,
|
||||
Sched<[sched.Folded, sched.ReadAfterFold]>;
|
||||
let isCommutable = IsCommutable in
|
||||
def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst),
|
||||
(ins RC:$src1, RC:$src2, u8imm:$src3), asm,
|
||||
[(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2,
|
||||
(i8 timm:$src3))))], d>,
|
||||
(i8 imm:$src3))))], d>,
|
||||
Sched<[sched]>;
|
||||
}
|
||||
|
||||
|
@ -2736,7 +2736,7 @@ defm : scalar_math_patterns<fadd, "ADDSD", X86Movsd, v2f64, f64, FR64, loadf64,
|
|||
defm : scalar_math_patterns<fsub, "SUBSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>;
|
||||
defm : scalar_math_patterns<fmul, "MULSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>;
|
||||
defm : scalar_math_patterns<fdiv, "DIVSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>;
|
||||
|
||||
|
||||
/// Unop Arithmetic
|
||||
/// In addition, we also have a special variant of the scalar form here to
|
||||
/// represent the associated intrinsic operation. This form is unlike the
|
||||
|
@ -3497,7 +3497,7 @@ multiclass PDI_binop_rmi<bits<8> opc, bits<8> opc2, Format ImmForm,
|
|||
!if(Is2Addr,
|
||||
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
|
||||
[(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i8 timm:$src2))))]>,
|
||||
[(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i8 imm:$src2))))]>,
|
||||
Sched<[schedImm]>;
|
||||
}
|
||||
|
||||
|
@ -3529,7 +3529,7 @@ multiclass PDI_binop_ri<bits<8> opc, Format ImmForm, string OpcodeStr,
|
|||
!if(Is2Addr,
|
||||
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
|
||||
[(set RC:$dst, (VT (OpNode RC:$src1, (i8 timm:$src2))))]>,
|
||||
[(set RC:$dst, (VT (OpNode RC:$src1, (i8 imm:$src2))))]>,
|
||||
Sched<[sched]>;
|
||||
}
|
||||
|
||||
|
@ -3612,7 +3612,7 @@ let Predicates = [HasAVX, prd] in {
|
|||
!strconcat("v", OpcodeStr,
|
||||
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(set VR128:$dst,
|
||||
(vt128 (OpNode VR128:$src1, (i8 timm:$src2))))]>,
|
||||
(vt128 (OpNode VR128:$src1, (i8 imm:$src2))))]>,
|
||||
VEX, Sched<[sched.XMM]>, VEX_WIG;
|
||||
def V#NAME#mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst),
|
||||
(ins i128mem:$src1, u8imm:$src2),
|
||||
|
@ -3620,7 +3620,7 @@ let Predicates = [HasAVX, prd] in {
|
|||
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(set VR128:$dst,
|
||||
(vt128 (OpNode (load addr:$src1),
|
||||
(i8 timm:$src2))))]>, VEX,
|
||||
(i8 imm:$src2))))]>, VEX,
|
||||
Sched<[sched.XMM.Folded]>, VEX_WIG;
|
||||
}
|
||||
|
||||
|
@ -3630,7 +3630,7 @@ let Predicates = [HasAVX2, prd] in {
|
|||
!strconcat("v", OpcodeStr,
|
||||
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(set VR256:$dst,
|
||||
(vt256 (OpNode VR256:$src1, (i8 timm:$src2))))]>,
|
||||
(vt256 (OpNode VR256:$src1, (i8 imm:$src2))))]>,
|
||||
VEX, VEX_L, Sched<[sched.YMM]>, VEX_WIG;
|
||||
def V#NAME#Ymi : Ii8<0x70, MRMSrcMem, (outs VR256:$dst),
|
||||
(ins i256mem:$src1, u8imm:$src2),
|
||||
|
@ -3638,7 +3638,7 @@ let Predicates = [HasAVX2, prd] in {
|
|||
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(set VR256:$dst,
|
||||
(vt256 (OpNode (load addr:$src1),
|
||||
(i8 timm:$src2))))]>, VEX, VEX_L,
|
||||
(i8 imm:$src2))))]>, VEX, VEX_L,
|
||||
Sched<[sched.YMM.Folded]>, VEX_WIG;
|
||||
}
|
||||
|
||||
|
@ -3648,7 +3648,7 @@ let Predicates = [UseSSE2] in {
|
|||
!strconcat(OpcodeStr,
|
||||
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(set VR128:$dst,
|
||||
(vt128 (OpNode VR128:$src1, (i8 timm:$src2))))]>,
|
||||
(vt128 (OpNode VR128:$src1, (i8 imm:$src2))))]>,
|
||||
Sched<[sched.XMM]>;
|
||||
def mi : Ii8<0x70, MRMSrcMem,
|
||||
(outs VR128:$dst), (ins i128mem:$src1, u8imm:$src2),
|
||||
|
@ -3656,7 +3656,7 @@ let Predicates = [UseSSE2] in {
|
|||
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(set VR128:$dst,
|
||||
(vt128 (OpNode (memop addr:$src1),
|
||||
(i8 timm:$src2))))]>,
|
||||
(i8 imm:$src2))))]>,
|
||||
Sched<[sched.XMM.Folded]>;
|
||||
}
|
||||
}
|
||||
|
@ -4827,7 +4827,7 @@ multiclass ssse3_palignr<string asm, ValueType VT, RegisterClass RC,
|
|||
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||
!strconcat(asm,
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
|
||||
[(set RC:$dst, (VT (X86PAlignr RC:$src1, RC:$src2, (i8 timm:$src3))))]>,
|
||||
[(set RC:$dst, (VT (X86PAlignr RC:$src1, RC:$src2, (i8 imm:$src3))))]>,
|
||||
Sched<[sched]>;
|
||||
let mayLoad = 1 in
|
||||
def rmi : SS3AI<0x0F, MRMSrcMem, (outs RC:$dst),
|
||||
|
@ -4838,7 +4838,7 @@ multiclass ssse3_palignr<string asm, ValueType VT, RegisterClass RC,
|
|||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
|
||||
[(set RC:$dst, (VT (X86PAlignr RC:$src1,
|
||||
(memop_frag addr:$src2),
|
||||
(i8 timm:$src3))))]>,
|
||||
(i8 imm:$src3))))]>,
|
||||
Sched<[sched.Folded, sched.ReadAfterFold]>;
|
||||
}
|
||||
}
|
||||
|
@ -5315,7 +5315,7 @@ multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1> {
|
|||
!strconcat(asm,
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
|
||||
[(set VR128:$dst,
|
||||
(X86insertps VR128:$src1, VR128:$src2, timm:$src3))]>,
|
||||
(X86insertps VR128:$src1, VR128:$src2, imm:$src3))]>,
|
||||
Sched<[SchedWriteFShuffle.XMM]>;
|
||||
def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
|
||||
(ins VR128:$src1, f32mem:$src2, u8imm:$src3),
|
||||
|
@ -5326,7 +5326,7 @@ multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1> {
|
|||
[(set VR128:$dst,
|
||||
(X86insertps VR128:$src1,
|
||||
(v4f32 (scalar_to_vector (loadf32 addr:$src2))),
|
||||
timm:$src3))]>,
|
||||
imm:$src3))]>,
|
||||
Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
|
||||
}
|
||||
|
||||
|
@ -5352,7 +5352,7 @@ multiclass sse41_fp_unop_p<bits<8> opc, string OpcodeStr,
|
|||
(outs RC:$dst), (ins RC:$src1, i32u8imm:$src2),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(set RC:$dst, (VT (OpNode RC:$src1, timm:$src2)))]>,
|
||||
[(set RC:$dst, (VT (OpNode RC:$src1, imm:$src2)))]>,
|
||||
Sched<[sched]>;
|
||||
|
||||
// Vector intrinsic operation, mem
|
||||
|
@ -5361,7 +5361,7 @@ multiclass sse41_fp_unop_p<bits<8> opc, string OpcodeStr,
|
|||
!strconcat(OpcodeStr,
|
||||
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(set RC:$dst,
|
||||
(VT (OpNode (mem_frag addr:$src1), timm:$src2)))]>,
|
||||
(VT (OpNode (mem_frag addr:$src1),imm:$src2)))]>,
|
||||
Sched<[sched.Folded]>;
|
||||
}
|
||||
|
||||
|
@ -5443,7 +5443,7 @@ let ExeDomain = SSEPackedSingle, isCodeGenOnly = 1 in {
|
|||
"ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||
!strconcat(OpcodeStr,
|
||||
"ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
|
||||
[(set VR128:$dst, (VT32 (OpNode VR128:$src1, VR128:$src2, timm:$src3)))]>,
|
||||
[(set VR128:$dst, (VT32 (OpNode VR128:$src1, VR128:$src2, imm:$src3)))]>,
|
||||
Sched<[sched]>;
|
||||
|
||||
def SSm_Int : SS4AIi8<opcss, MRMSrcMem,
|
||||
|
@ -5454,7 +5454,7 @@ let ExeDomain = SSEPackedSingle, isCodeGenOnly = 1 in {
|
|||
!strconcat(OpcodeStr,
|
||||
"ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
|
||||
[(set VR128:$dst,
|
||||
(OpNode VR128:$src1, sse_load_f32:$src2, timm:$src3))]>,
|
||||
(OpNode VR128:$src1, sse_load_f32:$src2, imm:$src3))]>,
|
||||
Sched<[sched.Folded, sched.ReadAfterFold]>;
|
||||
} // ExeDomain = SSEPackedSingle, isCodeGenOnly = 1
|
||||
|
||||
|
@ -5466,7 +5466,7 @@ let ExeDomain = SSEPackedDouble, isCodeGenOnly = 1 in {
|
|||
"sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||
!strconcat(OpcodeStr,
|
||||
"sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
|
||||
[(set VR128:$dst, (VT64 (OpNode VR128:$src1, VR128:$src2, timm:$src3)))]>,
|
||||
[(set VR128:$dst, (VT64 (OpNode VR128:$src1, VR128:$src2, imm:$src3)))]>,
|
||||
Sched<[sched]>;
|
||||
|
||||
def SDm_Int : SS4AIi8<opcsd, MRMSrcMem,
|
||||
|
@ -5477,7 +5477,7 @@ let ExeDomain = SSEPackedDouble, isCodeGenOnly = 1 in {
|
|||
!strconcat(OpcodeStr,
|
||||
"sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
|
||||
[(set VR128:$dst,
|
||||
(OpNode VR128:$src1, sse_load_f64:$src2, timm:$src3))]>,
|
||||
(OpNode VR128:$src1, sse_load_f64:$src2, imm:$src3))]>,
|
||||
Sched<[sched.Folded, sched.ReadAfterFold]>;
|
||||
} // ExeDomain = SSEPackedDouble, isCodeGenOnly = 1
|
||||
}
|
||||
|
@ -5512,16 +5512,16 @@ let Predicates = [UseAVX] in {
|
|||
}
|
||||
|
||||
let Predicates = [UseAVX] in {
|
||||
def : Pat<(X86VRndScale FR32:$src1, timm:$src2),
|
||||
def : Pat<(X86VRndScale FR32:$src1, imm:$src2),
|
||||
(VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src1, imm:$src2)>;
|
||||
def : Pat<(X86VRndScale FR64:$src1, timm:$src2),
|
||||
def : Pat<(X86VRndScale FR64:$src1, imm:$src2),
|
||||
(VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src1, imm:$src2)>;
|
||||
}
|
||||
|
||||
let Predicates = [UseAVX, OptForSize] in {
|
||||
def : Pat<(X86VRndScale (loadf32 addr:$src1), timm:$src2),
|
||||
def : Pat<(X86VRndScale (loadf32 addr:$src1), imm:$src2),
|
||||
(VROUNDSSm (f32 (IMPLICIT_DEF)), addr:$src1, imm:$src2)>;
|
||||
def : Pat<(X86VRndScale (loadf64 addr:$src1), timm:$src2),
|
||||
def : Pat<(X86VRndScale (loadf64 addr:$src1), imm:$src2),
|
||||
(VROUNDSDm (f64 (IMPLICIT_DEF)), addr:$src1, imm:$src2)>;
|
||||
}
|
||||
|
||||
|
@ -5539,16 +5539,16 @@ defm ROUND : sse41_fp_binop_s<0x0A, 0x0B, "round", SchedWriteFRnd.Scl,
|
|||
v4f32, v2f64, X86RndScales>;
|
||||
|
||||
let Predicates = [UseSSE41] in {
|
||||
def : Pat<(X86VRndScale FR32:$src1, timm:$src2),
|
||||
def : Pat<(X86VRndScale FR32:$src1, imm:$src2),
|
||||
(ROUNDSSr FR32:$src1, imm:$src2)>;
|
||||
def : Pat<(X86VRndScale FR64:$src1, timm:$src2),
|
||||
def : Pat<(X86VRndScale FR64:$src1, imm:$src2),
|
||||
(ROUNDSDr FR64:$src1, imm:$src2)>;
|
||||
}
|
||||
|
||||
let Predicates = [UseSSE41, OptForSize] in {
|
||||
def : Pat<(X86VRndScale (loadf32 addr:$src1), timm:$src2),
|
||||
def : Pat<(X86VRndScale (loadf32 addr:$src1), imm:$src2),
|
||||
(ROUNDSSm addr:$src1, imm:$src2)>;
|
||||
def : Pat<(X86VRndScale (loadf64 addr:$src1), timm:$src2),
|
||||
def : Pat<(X86VRndScale (loadf64 addr:$src1), imm:$src2),
|
||||
(ROUNDSDm addr:$src1, imm:$src2)>;
|
||||
}
|
||||
|
||||
|
@ -5830,7 +5830,7 @@ multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
|
|||
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
|
||||
[(set RC:$dst, (IntId RC:$src1, RC:$src2, timm:$src3))]>,
|
||||
[(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))]>,
|
||||
Sched<[sched]>;
|
||||
def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst),
|
||||
(ins RC:$src1, x86memop:$src2, u8imm:$src3),
|
||||
|
@ -5840,7 +5840,7 @@ multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
|
|||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
|
||||
[(set RC:$dst,
|
||||
(IntId RC:$src1, (memop_frag addr:$src2), timm:$src3))]>,
|
||||
(IntId RC:$src1, (memop_frag addr:$src2), imm:$src3))]>,
|
||||
Sched<[sched.Folded, sched.ReadAfterFold]>;
|
||||
}
|
||||
|
||||
|
@ -5857,7 +5857,7 @@ multiclass SS41I_binop_rmi<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
|||
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
|
||||
[(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, timm:$src3)))]>,
|
||||
[(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, imm:$src3)))]>,
|
||||
Sched<[sched]>;
|
||||
def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst),
|
||||
(ins RC:$src1, x86memop:$src2, u8imm:$src3),
|
||||
|
@ -5867,7 +5867,7 @@ multiclass SS41I_binop_rmi<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
|||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
|
||||
[(set RC:$dst,
|
||||
(OpVT (OpNode RC:$src1, (memop_frag addr:$src2), timm:$src3)))]>,
|
||||
(OpVT (OpNode RC:$src1, (memop_frag addr:$src2), imm:$src3)))]>,
|
||||
Sched<[sched.Folded, sched.ReadAfterFold]>;
|
||||
}
|
||||
|
||||
|
@ -6012,7 +6012,7 @@ let ExeDomain = d, Constraints = !if(Is2Addr, "$src1 = $dst", "") in {
|
|||
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
|
||||
[(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, timm:$src3)))]>,
|
||||
[(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, imm:$src3)))]>,
|
||||
Sched<[sched]>;
|
||||
def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst),
|
||||
(ins RC:$src1, x86memop:$src2, u8imm:$src3),
|
||||
|
@ -6022,12 +6022,12 @@ let ExeDomain = d, Constraints = !if(Is2Addr, "$src1 = $dst", "") in {
|
|||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
|
||||
[(set RC:$dst,
|
||||
(OpVT (OpNode RC:$src1, (memop_frag addr:$src2), timm:$src3)))]>,
|
||||
(OpVT (OpNode RC:$src1, (memop_frag addr:$src2), imm:$src3)))]>,
|
||||
Sched<[sched.Folded, sched.ReadAfterFold]>;
|
||||
}
|
||||
|
||||
// Pattern to commute if load is in first source.
|
||||
def : Pat<(OpVT (OpNode (memop_frag addr:$src2), RC:$src1, timm:$src3)),
|
||||
def : Pat<(OpVT (OpNode (memop_frag addr:$src2), RC:$src1, imm:$src3)),
|
||||
(!cast<Instruction>(NAME#"rmi") RC:$src1, addr:$src2,
|
||||
(commuteXForm imm:$src3))>;
|
||||
}
|
||||
|
@ -6065,36 +6065,36 @@ let Predicates = [HasAVX2] in {
|
|||
// Emulate vXi32/vXi64 blends with vXf32/vXf64 or pblendw.
|
||||
// ExecutionDomainFixPass will cleanup domains later on.
|
||||
let Predicates = [HasAVX1Only] in {
|
||||
def : Pat<(X86Blendi (v4i64 VR256:$src1), (v4i64 VR256:$src2), timm:$src3),
|
||||
def : Pat<(X86Blendi (v4i64 VR256:$src1), (v4i64 VR256:$src2), imm:$src3),
|
||||
(VBLENDPDYrri VR256:$src1, VR256:$src2, imm:$src3)>;
|
||||
def : Pat<(X86Blendi VR256:$src1, (loadv4i64 addr:$src2), timm:$src3),
|
||||
def : Pat<(X86Blendi VR256:$src1, (loadv4i64 addr:$src2), imm:$src3),
|
||||
(VBLENDPDYrmi VR256:$src1, addr:$src2, imm:$src3)>;
|
||||
def : Pat<(X86Blendi (loadv4i64 addr:$src2), VR256:$src1, timm:$src3),
|
||||
def : Pat<(X86Blendi (loadv4i64 addr:$src2), VR256:$src1, imm:$src3),
|
||||
(VBLENDPDYrmi VR256:$src1, addr:$src2, (BlendCommuteImm4 imm:$src3))>;
|
||||
|
||||
// Use pblendw for 128-bit integer to keep it in the integer domain and prevent
|
||||
// it from becoming movsd via commuting under optsize.
|
||||
def : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), timm:$src3),
|
||||
def : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), imm:$src3),
|
||||
(VPBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm2 imm:$src3))>;
|
||||
def : Pat<(X86Blendi VR128:$src1, (loadv2i64 addr:$src2), timm:$src3),
|
||||
def : Pat<(X86Blendi VR128:$src1, (loadv2i64 addr:$src2), imm:$src3),
|
||||
(VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm2 imm:$src3))>;
|
||||
def : Pat<(X86Blendi (loadv2i64 addr:$src2), VR128:$src1, timm:$src3),
|
||||
def : Pat<(X86Blendi (loadv2i64 addr:$src2), VR128:$src1, imm:$src3),
|
||||
(VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm2 imm:$src3))>;
|
||||
|
||||
def : Pat<(X86Blendi (v8i32 VR256:$src1), (v8i32 VR256:$src2), timm:$src3),
|
||||
def : Pat<(X86Blendi (v8i32 VR256:$src1), (v8i32 VR256:$src2), imm:$src3),
|
||||
(VBLENDPSYrri VR256:$src1, VR256:$src2, imm:$src3)>;
|
||||
def : Pat<(X86Blendi VR256:$src1, (loadv8i32 addr:$src2), timm:$src3),
|
||||
def : Pat<(X86Blendi VR256:$src1, (loadv8i32 addr:$src2), imm:$src3),
|
||||
(VBLENDPSYrmi VR256:$src1, addr:$src2, imm:$src3)>;
|
||||
def : Pat<(X86Blendi (loadv8i32 addr:$src2), VR256:$src1, timm:$src3),
|
||||
def : Pat<(X86Blendi (loadv8i32 addr:$src2), VR256:$src1, imm:$src3),
|
||||
(VBLENDPSYrmi VR256:$src1, addr:$src2, (BlendCommuteImm8 imm:$src3))>;
|
||||
|
||||
// Use pblendw for 128-bit integer to keep it in the integer domain and prevent
|
||||
// it from becoming movss via commuting under optsize.
|
||||
def : Pat<(X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2), timm:$src3),
|
||||
def : Pat<(X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2), imm:$src3),
|
||||
(VPBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm4 imm:$src3))>;
|
||||
def : Pat<(X86Blendi VR128:$src1, (loadv4i32 addr:$src2), timm:$src3),
|
||||
def : Pat<(X86Blendi VR128:$src1, (loadv4i32 addr:$src2), imm:$src3),
|
||||
(VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm4 imm:$src3))>;
|
||||
def : Pat<(X86Blendi (loadv4i32 addr:$src2), VR128:$src1, timm:$src3),
|
||||
def : Pat<(X86Blendi (loadv4i32 addr:$src2), VR128:$src1, imm:$src3),
|
||||
(VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm4 imm:$src3))>;
|
||||
}
|
||||
|
||||
|
@ -6111,18 +6111,18 @@ defm PBLENDW : SS41I_blend_rmi<0x0E, "pblendw", X86Blendi, v8i16,
|
|||
let Predicates = [UseSSE41] in {
|
||||
// Use pblendw for 128-bit integer to keep it in the integer domain and prevent
|
||||
// it from becoming movss via commuting under optsize.
|
||||
def : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), timm:$src3),
|
||||
def : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), imm:$src3),
|
||||
(PBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm2 imm:$src3))>;
|
||||
def : Pat<(X86Blendi VR128:$src1, (memopv2i64 addr:$src2), timm:$src3),
|
||||
def : Pat<(X86Blendi VR128:$src1, (memopv2i64 addr:$src2), imm:$src3),
|
||||
(PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm2 imm:$src3))>;
|
||||
def : Pat<(X86Blendi (memopv2i64 addr:$src2), VR128:$src1, timm:$src3),
|
||||
def : Pat<(X86Blendi (memopv2i64 addr:$src2), VR128:$src1, imm:$src3),
|
||||
(PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm2 imm:$src3))>;
|
||||
|
||||
def : Pat<(X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2), timm:$src3),
|
||||
def : Pat<(X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2), imm:$src3),
|
||||
(PBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm4 imm:$src3))>;
|
||||
def : Pat<(X86Blendi VR128:$src1, (memopv4i32 addr:$src2), timm:$src3),
|
||||
def : Pat<(X86Blendi VR128:$src1, (memopv4i32 addr:$src2), imm:$src3),
|
||||
(PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm4 imm:$src3))>;
|
||||
def : Pat<(X86Blendi (memopv4i32 addr:$src2), VR128:$src1, timm:$src3),
|
||||
def : Pat<(X86Blendi (memopv4i32 addr:$src2), VR128:$src1, imm:$src3),
|
||||
(PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm4 imm:$src3))>;
|
||||
}
|
||||
|
||||
|
@ -6596,7 +6596,7 @@ let Constraints = "$src1 = $dst", Predicates = [HasSHA] in {
|
|||
"sha1rnds4\t{$src3, $src2, $dst|$dst, $src2, $src3}",
|
||||
[(set VR128:$dst,
|
||||
(int_x86_sha1rnds4 VR128:$src1, VR128:$src2,
|
||||
(i8 timm:$src3)))]>, TA,
|
||||
(i8 imm:$src3)))]>, TA,
|
||||
Sched<[SchedWriteVecIMul.XMM]>;
|
||||
def SHA1RNDS4rmi : Ii8<0xCC, MRMSrcMem, (outs VR128:$dst),
|
||||
(ins VR128:$src1, i128mem:$src2, u8imm:$src3),
|
||||
|
@ -6604,7 +6604,7 @@ let Constraints = "$src1 = $dst", Predicates = [HasSHA] in {
|
|||
[(set VR128:$dst,
|
||||
(int_x86_sha1rnds4 VR128:$src1,
|
||||
(memop addr:$src2),
|
||||
(i8 timm:$src3)))]>, TA,
|
||||
(i8 imm:$src3)))]>, TA,
|
||||
Sched<[SchedWriteVecIMul.XMM.Folded,
|
||||
SchedWriteVecIMul.XMM.ReadAfterFold]>;
|
||||
|
||||
|
@ -6722,26 +6722,26 @@ let Predicates = [HasAVX, HasAES] in {
|
|||
(ins VR128:$src1, u8imm:$src2),
|
||||
"vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
[(set VR128:$dst,
|
||||
(int_x86_aesni_aeskeygenassist VR128:$src1, timm:$src2))]>,
|
||||
(int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>,
|
||||
Sched<[WriteAESKeyGen]>, VEX, VEX_WIG;
|
||||
def VAESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst),
|
||||
(ins i128mem:$src1, u8imm:$src2),
|
||||
"vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
[(set VR128:$dst,
|
||||
(int_x86_aesni_aeskeygenassist (load addr:$src1), timm:$src2))]>,
|
||||
(int_x86_aesni_aeskeygenassist (load addr:$src1), imm:$src2))]>,
|
||||
Sched<[WriteAESKeyGen.Folded]>, VEX, VEX_WIG;
|
||||
}
|
||||
def AESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst),
|
||||
(ins VR128:$src1, u8imm:$src2),
|
||||
"aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
[(set VR128:$dst,
|
||||
(int_x86_aesni_aeskeygenassist VR128:$src1, timm:$src2))]>,
|
||||
(int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>,
|
||||
Sched<[WriteAESKeyGen]>;
|
||||
def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst),
|
||||
(ins i128mem:$src1, u8imm:$src2),
|
||||
"aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
[(set VR128:$dst,
|
||||
(int_x86_aesni_aeskeygenassist (memop addr:$src1), timm:$src2))]>,
|
||||
(int_x86_aesni_aeskeygenassist (memop addr:$src1), imm:$src2))]>,
|
||||
Sched<[WriteAESKeyGen.Folded]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -6762,7 +6762,7 @@ let Predicates = [NoAVX, HasPCLMUL] in {
|
|||
(ins VR128:$src1, VR128:$src2, u8imm:$src3),
|
||||
"pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}",
|
||||
[(set VR128:$dst,
|
||||
(int_x86_pclmulqdq VR128:$src1, VR128:$src2, timm:$src3))]>,
|
||||
(int_x86_pclmulqdq VR128:$src1, VR128:$src2, imm:$src3))]>,
|
||||
Sched<[WriteCLMul]>;
|
||||
|
||||
def PCLMULQDQrm : PCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst),
|
||||
|
@ -6770,12 +6770,12 @@ let Predicates = [NoAVX, HasPCLMUL] in {
|
|||
"pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}",
|
||||
[(set VR128:$dst,
|
||||
(int_x86_pclmulqdq VR128:$src1, (memop addr:$src2),
|
||||
timm:$src3))]>,
|
||||
imm:$src3))]>,
|
||||
Sched<[WriteCLMul.Folded, WriteCLMul.ReadAfterFold]>;
|
||||
} // Constraints = "$src1 = $dst"
|
||||
|
||||
def : Pat<(int_x86_pclmulqdq (memop addr:$src2), VR128:$src1,
|
||||
(i8 timm:$src3)),
|
||||
(i8 imm:$src3)),
|
||||
(PCLMULQDQrm VR128:$src1, addr:$src2,
|
||||
(PCLMULCommuteImm imm:$src3))>;
|
||||
} // Predicates = [NoAVX, HasPCLMUL]
|
||||
|
@ -6799,19 +6799,19 @@ multiclass vpclmulqdq<RegisterClass RC, X86MemOperand MemOp,
|
|||
(ins RC:$src1, RC:$src2, u8imm:$src3),
|
||||
"vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
|
||||
[(set RC:$dst,
|
||||
(IntId RC:$src1, RC:$src2, timm:$src3))]>,
|
||||
(IntId RC:$src1, RC:$src2, imm:$src3))]>,
|
||||
Sched<[WriteCLMul]>;
|
||||
|
||||
def rm : PCLMULIi8<0x44, MRMSrcMem, (outs RC:$dst),
|
||||
(ins RC:$src1, MemOp:$src2, u8imm:$src3),
|
||||
"vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
|
||||
[(set RC:$dst,
|
||||
(IntId RC:$src1, (LdFrag addr:$src2), timm:$src3))]>,
|
||||
(IntId RC:$src1, (LdFrag addr:$src2), imm:$src3))]>,
|
||||
Sched<[WriteCLMul.Folded, WriteCLMul.ReadAfterFold]>;
|
||||
|
||||
// We can commute a load in the first operand by swapping the sources and
|
||||
// rotating the immediate.
|
||||
def : Pat<(IntId (LdFrag addr:$src2), RC:$src1, (i8 timm:$src3)),
|
||||
def : Pat<(IntId (LdFrag addr:$src2), RC:$src1, (i8 imm:$src3)),
|
||||
(!cast<Instruction>(NAME#"rm") RC:$src1, addr:$src2,
|
||||
(PCLMULCommuteImm imm:$src3))>;
|
||||
}
|
||||
|
@ -6857,8 +6857,8 @@ let Constraints = "$src = $dst" in {
|
|||
def EXTRQI : Ii8<0x78, MRMXr, (outs VR128:$dst),
|
||||
(ins VR128:$src, u8imm:$len, u8imm:$idx),
|
||||
"extrq\t{$idx, $len, $src|$src, $len, $idx}",
|
||||
[(set VR128:$dst, (X86extrqi VR128:$src, timm:$len,
|
||||
timm:$idx))]>,
|
||||
[(set VR128:$dst, (X86extrqi VR128:$src, imm:$len,
|
||||
imm:$idx))]>,
|
||||
PD, Sched<[SchedWriteVecALU.XMM]>;
|
||||
def EXTRQ : I<0x79, MRMSrcReg, (outs VR128:$dst),
|
||||
(ins VR128:$src, VR128:$mask),
|
||||
|
@ -6871,7 +6871,7 @@ def INSERTQI : Ii8<0x78, MRMSrcReg, (outs VR128:$dst),
|
|||
(ins VR128:$src, VR128:$src2, u8imm:$len, u8imm:$idx),
|
||||
"insertq\t{$idx, $len, $src2, $src|$src, $src2, $len, $idx}",
|
||||
[(set VR128:$dst, (X86insertqi VR128:$src, VR128:$src2,
|
||||
timm:$len, timm:$idx))]>,
|
||||
imm:$len, imm:$idx))]>,
|
||||
XD, Sched<[SchedWriteVecALU.XMM]>;
|
||||
def INSERTQ : I<0x79, MRMSrcReg, (outs VR128:$dst),
|
||||
(ins VR128:$src, VR128:$mask),
|
||||
|
@ -7142,13 +7142,13 @@ multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr,
|
|||
def ri : AVXAIi8<opc_rmi, MRMSrcReg, (outs RC:$dst),
|
||||
(ins RC:$src1, u8imm:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(set RC:$dst, (f_vt (X86VPermilpi RC:$src1, (i8 timm:$src2))))]>, VEX,
|
||||
[(set RC:$dst, (f_vt (X86VPermilpi RC:$src1, (i8 imm:$src2))))]>, VEX,
|
||||
Sched<[sched]>;
|
||||
def mi : AVXAIi8<opc_rmi, MRMSrcMem, (outs RC:$dst),
|
||||
(ins x86memop_f:$src1, u8imm:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(set RC:$dst,
|
||||
(f_vt (X86VPermilpi (load addr:$src1), (i8 timm:$src2))))]>, VEX,
|
||||
(f_vt (X86VPermilpi (load addr:$src1), (i8 imm:$src2))))]>, VEX,
|
||||
Sched<[sched.Folded]>;
|
||||
}// Predicates = [HasAVX, NoVLX]
|
||||
}
|
||||
|
@ -7180,13 +7180,13 @@ def VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst),
|
|||
(ins VR256:$src1, VR256:$src2, u8imm:$src3),
|
||||
"vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
|
||||
[(set VR256:$dst, (v4f64 (X86VPerm2x128 VR256:$src1, VR256:$src2,
|
||||
(i8 timm:$src3))))]>, VEX_4V, VEX_L,
|
||||
(i8 imm:$src3))))]>, VEX_4V, VEX_L,
|
||||
Sched<[WriteFShuffle256]>;
|
||||
def VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst),
|
||||
(ins VR256:$src1, f256mem:$src2, u8imm:$src3),
|
||||
"vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
|
||||
[(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (loadv4f64 addr:$src2),
|
||||
(i8 timm:$src3)))]>, VEX_4V, VEX_L,
|
||||
(i8 imm:$src3)))]>, VEX_4V, VEX_L,
|
||||
Sched<[WriteFShuffle256.Folded, WriteFShuffle256.ReadAfterFold]>;
|
||||
}
|
||||
|
||||
|
@ -7198,19 +7198,19 @@ def Perm2XCommuteImm : SDNodeXForm<imm, [{
|
|||
let Predicates = [HasAVX] in {
|
||||
// Pattern with load in other operand.
|
||||
def : Pat<(v4f64 (X86VPerm2x128 (loadv4f64 addr:$src2),
|
||||
VR256:$src1, (i8 timm:$imm))),
|
||||
VR256:$src1, (i8 imm:$imm))),
|
||||
(VPERM2F128rm VR256:$src1, addr:$src2, (Perm2XCommuteImm imm:$imm))>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX1Only] in {
|
||||
def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 timm:$imm))),
|
||||
def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
|
||||
(VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
|
||||
def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1,
|
||||
(loadv4i64 addr:$src2), (i8 timm:$imm))),
|
||||
(loadv4i64 addr:$src2), (i8 imm:$imm))),
|
||||
(VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
|
||||
// Pattern with load in other operand.
|
||||
def : Pat<(v4i64 (X86VPerm2x128 (loadv4i64 addr:$src2),
|
||||
VR256:$src1, (i8 timm:$imm))),
|
||||
VR256:$src1, (i8 imm:$imm))),
|
||||
(VPERM2F128rm VR256:$src1, addr:$src2, (Perm2XCommuteImm imm:$imm))>;
|
||||
}
|
||||
|
||||
|
@ -7256,7 +7256,7 @@ multiclass f16c_ps2ph<RegisterClass RC, X86MemOperand x86memop,
|
|||
def rr : Ii8<0x1D, MRMDestReg, (outs VR128:$dst),
|
||||
(ins RC:$src1, i32u8imm:$src2),
|
||||
"vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
[(set VR128:$dst, (X86cvtps2ph RC:$src1, timm:$src2))]>,
|
||||
[(set VR128:$dst, (X86cvtps2ph RC:$src1, imm:$src2))]>,
|
||||
TAPD, VEX, Sched<[RR]>;
|
||||
let hasSideEffects = 0, mayStore = 1 in
|
||||
def mr : Ii8<0x1D, MRMDestMem, (outs),
|
||||
|
@ -7326,18 +7326,18 @@ multiclass AVX2_blend_rmi<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
|||
(ins RC:$src1, RC:$src2, u8imm:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
|
||||
[(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, timm:$src3)))]>,
|
||||
[(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, imm:$src3)))]>,
|
||||
Sched<[sched]>, VEX_4V;
|
||||
def rmi : AVX2AIi8<opc, MRMSrcMem, (outs RC:$dst),
|
||||
(ins RC:$src1, x86memop:$src2, u8imm:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
|
||||
[(set RC:$dst,
|
||||
(OpVT (OpNode RC:$src1, (load addr:$src2), timm:$src3)))]>,
|
||||
(OpVT (OpNode RC:$src1, (load addr:$src2), imm:$src3)))]>,
|
||||
Sched<[sched.Folded, sched.ReadAfterFold]>, VEX_4V;
|
||||
|
||||
// Pattern to commute if load is in first source.
|
||||
def : Pat<(OpVT (OpNode (load addr:$src2), RC:$src1, timm:$src3)),
|
||||
def : Pat<(OpVT (OpNode (load addr:$src2), RC:$src1, imm:$src3)),
|
||||
(!cast<Instruction>(NAME#"rmi") RC:$src1, addr:$src2,
|
||||
(commuteXForm imm:$src3))>;
|
||||
}
|
||||
|
@ -7350,18 +7350,18 @@ defm VPBLENDDY : AVX2_blend_rmi<0x02, "vpblendd", X86Blendi, v8i32,
|
|||
SchedWriteBlend.YMM, VR256, i256mem,
|
||||
BlendCommuteImm8>, VEX_L;
|
||||
|
||||
def : Pat<(X86Blendi (v4i64 VR256:$src1), (v4i64 VR256:$src2), timm:$src3),
|
||||
def : Pat<(X86Blendi (v4i64 VR256:$src1), (v4i64 VR256:$src2), imm:$src3),
|
||||
(VPBLENDDYrri VR256:$src1, VR256:$src2, (BlendScaleImm4 imm:$src3))>;
|
||||
def : Pat<(X86Blendi VR256:$src1, (loadv4i64 addr:$src2), timm:$src3),
|
||||
def : Pat<(X86Blendi VR256:$src1, (loadv4i64 addr:$src2), imm:$src3),
|
||||
(VPBLENDDYrmi VR256:$src1, addr:$src2, (BlendScaleImm4 imm:$src3))>;
|
||||
def : Pat<(X86Blendi (loadv4i64 addr:$src2), VR256:$src1, timm:$src3),
|
||||
def : Pat<(X86Blendi (loadv4i64 addr:$src2), VR256:$src1, imm:$src3),
|
||||
(VPBLENDDYrmi VR256:$src1, addr:$src2, (BlendScaleCommuteImm4 imm:$src3))>;
|
||||
|
||||
def : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), timm:$src3),
|
||||
def : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), imm:$src3),
|
||||
(VPBLENDDrri VR128:$src1, VR128:$src2, (BlendScaleImm2to4 imm:$src3))>;
|
||||
def : Pat<(X86Blendi VR128:$src1, (loadv2i64 addr:$src2), timm:$src3),
|
||||
def : Pat<(X86Blendi VR128:$src1, (loadv2i64 addr:$src2), imm:$src3),
|
||||
(VPBLENDDrmi VR128:$src1, addr:$src2, (BlendScaleImm2to4 imm:$src3))>;
|
||||
def : Pat<(X86Blendi (loadv2i64 addr:$src2), VR128:$src1, timm:$src3),
|
||||
def : Pat<(X86Blendi (loadv2i64 addr:$src2), VR128:$src1, imm:$src3),
|
||||
(VPBLENDDrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm2to4 imm:$src3))>;
|
||||
}
|
||||
|
||||
|
@ -7611,7 +7611,7 @@ multiclass avx2_perm_imm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
|
|||
!strconcat(OpcodeStr,
|
||||
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(set VR256:$dst,
|
||||
(OpVT (X86VPermi VR256:$src1, (i8 timm:$src2))))]>,
|
||||
(OpVT (X86VPermi VR256:$src1, (i8 imm:$src2))))]>,
|
||||
Sched<[Sched]>, VEX, VEX_L;
|
||||
def Ymi : AVX2AIi8<opc, MRMSrcMem, (outs VR256:$dst),
|
||||
(ins memOp:$src1, u8imm:$src2),
|
||||
|
@ -7619,7 +7619,7 @@ multiclass avx2_perm_imm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
|
|||
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(set VR256:$dst,
|
||||
(OpVT (X86VPermi (mem_frag addr:$src1),
|
||||
(i8 timm:$src2))))]>,
|
||||
(i8 imm:$src2))))]>,
|
||||
Sched<[Sched.Folded, Sched.ReadAfterFold]>, VEX, VEX_L;
|
||||
}
|
||||
}
|
||||
|
@ -7638,18 +7638,18 @@ def VPERM2I128rr : AVX2AIi8<0x46, MRMSrcReg, (outs VR256:$dst),
|
|||
(ins VR256:$src1, VR256:$src2, u8imm:$src3),
|
||||
"vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
|
||||
[(set VR256:$dst, (v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2,
|
||||
(i8 timm:$src3))))]>, Sched<[WriteShuffle256]>,
|
||||
(i8 imm:$src3))))]>, Sched<[WriteShuffle256]>,
|
||||
VEX_4V, VEX_L;
|
||||
def VPERM2I128rm : AVX2AIi8<0x46, MRMSrcMem, (outs VR256:$dst),
|
||||
(ins VR256:$src1, f256mem:$src2, u8imm:$src3),
|
||||
"vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
|
||||
[(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (loadv4i64 addr:$src2),
|
||||
(i8 timm:$src3)))]>,
|
||||
(i8 imm:$src3)))]>,
|
||||
Sched<[WriteShuffle256.Folded, WriteShuffle256.ReadAfterFold]>, VEX_4V, VEX_L;
|
||||
|
||||
let Predicates = [HasAVX2] in
|
||||
def : Pat<(v4i64 (X86VPerm2x128 (loadv4i64 addr:$src2),
|
||||
VR256:$src1, (i8 timm:$imm))),
|
||||
VR256:$src1, (i8 imm:$imm))),
|
||||
(VPERM2I128rm VR256:$src1, addr:$src2, (Perm2XCommuteImm imm:$imm))>;
|
||||
|
||||
|
||||
|
@ -7931,13 +7931,13 @@ multiclass GF2P8AFFINE_rmi<bits<8> Op, string OpStr, ValueType OpVT,
|
|||
OpStr##"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}") in {
|
||||
def rri : Ii8<Op, MRMSrcReg, (outs RC:$dst),
|
||||
(ins RC:$src1, RC:$src2, u8imm:$src3), "",
|
||||
[(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, timm:$src3)))],
|
||||
[(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, imm:$src3)))],
|
||||
SSEPackedInt>, Sched<[SchedWriteVecALU.XMM]>;
|
||||
def rmi : Ii8<Op, MRMSrcMem, (outs RC:$dst),
|
||||
(ins RC:$src1, X86MemOp:$src2, u8imm:$src3), "",
|
||||
[(set RC:$dst, (OpVT (OpNode RC:$src1,
|
||||
(MemOpFrag addr:$src2),
|
||||
timm:$src3)))], SSEPackedInt>,
|
||||
imm:$src3)))], SSEPackedInt>,
|
||||
Sched<[SchedWriteVecALU.XMM.Folded, SchedWriteVecALU.XMM.ReadAfterFold]>;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -43,7 +43,7 @@ def INT3 : I<0xcc, RawFrm, (outs), (ins), "int3", [(int_x86_int (i8 3))]>;
|
|||
let SchedRW = [WriteSystem] in {
|
||||
|
||||
def INT : Ii8<0xcd, RawFrm, (outs), (ins u8imm:$trap), "int\t$trap",
|
||||
[(int_x86_int timm:$trap)]>;
|
||||
[(int_x86_int imm:$trap)]>;
|
||||
|
||||
|
||||
def SYSCALL : I<0x05, RawFrm, (outs), (ins), "syscall", []>, TB;
|
||||
|
|
|
@ -45,7 +45,7 @@ def XTEST : I<0x01, MRM_D6, (outs), (ins),
|
|||
|
||||
def XABORT : Ii8<0xc6, MRM_F8, (outs), (ins i8imm:$imm),
|
||||
"xabort\t$imm",
|
||||
[(int_x86_xabort timm:$imm)]>, Requires<[HasRTM]>;
|
||||
[(int_x86_xabort imm:$imm)]>, Requires<[HasRTM]>;
|
||||
} // SchedRW
|
||||
|
||||
// HLE prefixes
|
||||
|
|
|
@ -143,13 +143,13 @@ multiclass xop3opimm<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
|||
(ins VR128:$src1, u8imm:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(set VR128:$dst,
|
||||
(vt128 (OpNode (vt128 VR128:$src1), timm:$src2)))]>,
|
||||
(vt128 (OpNode (vt128 VR128:$src1), imm:$src2)))]>,
|
||||
XOP, Sched<[sched]>;
|
||||
def mi : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
|
||||
(ins i128mem:$src1, u8imm:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(set VR128:$dst,
|
||||
(vt128 (OpNode (vt128 (load addr:$src1)), timm:$src2)))]>,
|
||||
(vt128 (OpNode (vt128 (load addr:$src1)), imm:$src2)))]>,
|
||||
XOP, Sched<[sched.Folded, sched.ReadAfterFold]>;
|
||||
}
|
||||
|
||||
|
@ -251,7 +251,7 @@ multiclass xopvpcom<bits<8> opc, string Suffix, SDNode OpNode, ValueType vt128,
|
|||
"\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
|
||||
[(set VR128:$dst,
|
||||
(vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2),
|
||||
timm:$cc)))]>,
|
||||
imm:$cc)))]>,
|
||||
XOP_4V, Sched<[sched]>;
|
||||
def mi : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
|
||||
(ins VR128:$src1, i128mem:$src2, u8imm:$cc),
|
||||
|
@ -260,12 +260,12 @@ multiclass xopvpcom<bits<8> opc, string Suffix, SDNode OpNode, ValueType vt128,
|
|||
[(set VR128:$dst,
|
||||
(vt128 (OpNode (vt128 VR128:$src1),
|
||||
(vt128 (load addr:$src2)),
|
||||
timm:$cc)))]>,
|
||||
imm:$cc)))]>,
|
||||
XOP_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
|
||||
}
|
||||
|
||||
def : Pat<(OpNode (load addr:$src2),
|
||||
(vt128 VR128:$src1), timm:$cc),
|
||||
(vt128 VR128:$src1), imm:$cc),
|
||||
(!cast<Instruction>(NAME#"mi") VR128:$src1, addr:$src2,
|
||||
(CommuteVPCOMCC imm:$cc))>;
|
||||
}
|
||||
|
@ -422,7 +422,7 @@ multiclass xop_vpermil2<bits<8> Opc, string OpcodeStr, RegisterClass RC,
|
|||
!strconcat(OpcodeStr,
|
||||
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
|
||||
[(set RC:$dst,
|
||||
(VT (X86vpermil2 RC:$src1, RC:$src2, RC:$src3, (i8 timm:$src4))))]>,
|
||||
(VT (X86vpermil2 RC:$src1, RC:$src2, RC:$src3, (i8 imm:$src4))))]>,
|
||||
Sched<[sched]>;
|
||||
def rm : IXOP5<Opc, MRMSrcMemOp4, (outs RC:$dst),
|
||||
(ins RC:$src1, RC:$src2, intmemop:$src3, u4imm:$src4),
|
||||
|
@ -430,7 +430,7 @@ multiclass xop_vpermil2<bits<8> Opc, string OpcodeStr, RegisterClass RC,
|
|||
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
|
||||
[(set RC:$dst,
|
||||
(VT (X86vpermil2 RC:$src1, RC:$src2, (IntLdFrag addr:$src3),
|
||||
(i8 timm:$src4))))]>, VEX_W,
|
||||
(i8 imm:$src4))))]>, VEX_W,
|
||||
Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>;
|
||||
def mr : IXOP5<Opc, MRMSrcMem, (outs RC:$dst),
|
||||
(ins RC:$src1, fpmemop:$src2, RC:$src3, u4imm:$src4),
|
||||
|
@ -438,7 +438,7 @@ multiclass xop_vpermil2<bits<8> Opc, string OpcodeStr, RegisterClass RC,
|
|||
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
|
||||
[(set RC:$dst,
|
||||
(VT (X86vpermil2 RC:$src1, (FPLdFrag addr:$src2),
|
||||
RC:$src3, (i8 timm:$src4))))]>,
|
||||
RC:$src3, (i8 imm:$src4))))]>,
|
||||
Sched<[sched.Folded, sched.ReadAfterFold,
|
||||
// fpmemop:$src2
|
||||
ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
|
||||
|
|
|
@ -389,7 +389,8 @@ define void @store(i64* %addr, i64 addrspace(42)* %addr42, i64 %val1, i64 %val2)
|
|||
; CHECK-LABEL: name: intrinsics
|
||||
; CHECK: [[CUR:%[0-9]+]]:_(s32) = COPY $w0
|
||||
; CHECK: [[BITS:%[0-9]+]]:_(s32) = COPY $w1
|
||||
; CHECK: [[PTR:%[0-9]+]]:_(p0) = G_INTRINSIC intrinsic(@llvm.returnaddress), 0
|
||||
; CHECK: [[CREG:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
|
||||
; CHECK: [[PTR:%[0-9]+]]:_(p0) = G_INTRINSIC intrinsic(@llvm.returnaddress), [[CREG]]
|
||||
; CHECK: [[PTR_VEC:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.ptr.vec
|
||||
; CHECK: [[VEC:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[PTR_VEC]]
|
||||
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.st2), [[VEC]](<8 x s8>), [[VEC]](<8 x s8>), [[PTR]](p0)
|
||||
|
|
|
@ -10,20 +10,24 @@ body: |
|
|||
bb.0:
|
||||
liveins: $vgpr0
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:sgpr(s32) = G_CONSTANT i32 1
|
||||
%2:sgpr(s32) = G_CONSTANT i32 15
|
||||
%3:sgpr(s1) = G_CONSTANT i1 0
|
||||
%4:sgpr(s1) = G_CONSTANT i1 1
|
||||
|
||||
; CHECK: EXP 1, %0, %0, %0, %0, 0, 0, 15, implicit $exec
|
||||
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp),1, 15, %0:vgpr(s32), %0:vgpr(s32), %0:vgpr(s32), %0:vgpr(s32), 0, 0
|
||||
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), %1:sgpr(s32), %2:sgpr(s32), %0:vgpr(s32), %0:vgpr(s32), %0:vgpr(s32), %0:vgpr(s32), %3:sgpr(s1), %3:sgpr(s1)
|
||||
|
||||
; CHECK: EXP_DONE 1, %0, %0, %0, %0, 0, 0, 15, implicit $exec
|
||||
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 1, 15, %0:vgpr(s32), %0:vgpr(s32), %0:vgpr(s32), %0:vgpr(s32), 1, 0
|
||||
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), %1:sgpr(s32), %2:sgpr(s32), %0:vgpr(s32), %0:vgpr(s32), %0:vgpr(s32), %0:vgpr(s32), %4:sgpr(s1), %3:sgpr(s1)
|
||||
|
||||
%5:vgpr(<2 x s16>) = G_BITCAST %0(s32)
|
||||
|
||||
; CHECK: [[UNDEF0:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
||||
; CHECK: EXP 1, %0, %0, [[UNDEF0]], [[UNDEF0]], 0, 1, 15, implicit $exec
|
||||
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.compr), 1, 15, %5:vgpr(<2 x s16>), %5:vgpr(<2 x s16>), 0, 0
|
||||
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.compr), %1:sgpr(s32), %2:sgpr(s32), %5:vgpr(<2 x s16>), %5:vgpr(<2 x s16>), %3:sgpr(s1), %3:sgpr(s1)
|
||||
|
||||
; CHECK: [[UNDEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
||||
; CHECK: EXP_DONE 1, %0, %0, [[UNDEF1]], [[UNDEF1]], 0, 1, 15, implicit $exec
|
||||
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.compr), 1, 15, %5:vgpr(<2 x s16>), %5:vgpr(<2 x s16>), 1, 0
|
||||
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.compr), %1:sgpr(s32), %2:sgpr(s32), %5:vgpr(<2 x s16>), %5:vgpr(<2 x s16>), %4:sgpr(s1), %3:sgpr(s1)
|
||||
|
||||
|
|
|
@ -18,7 +18,8 @@ body: |
|
|||
; GCN: S_SENDMSG 1, implicit $exec, implicit $m0
|
||||
; GCN: S_ENDPGM 0
|
||||
%0:sgpr(s32) = COPY $sgpr0
|
||||
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.sendmsg), 1, %0(s32)
|
||||
%2:sgpr(s32) = G_CONSTANT i32 1
|
||||
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.sendmsg), %2(s32), %0(s32)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
|
|
@ -1,15 +0,0 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
; RUN: llc -march=amdgcn -O0 -stop-after=irtranslator -global-isel -verify-machineinstrs %s -o - | FileCheck %s
|
||||
|
||||
declare void @llvm.amdgcn.s.sendmsg(i32 immarg, i32)
|
||||
|
||||
define amdgpu_ps void @test_sendmsg(i32 inreg %m0) {
|
||||
; CHECK-LABEL: name: test_sendmsg
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: liveins: $sgpr0
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
|
||||
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.sendmsg), 12, [[COPY]](s32)
|
||||
; CHECK: S_ENDPGM
|
||||
call void @llvm.amdgcn.s.sendmsg(i32 12, i32 %m0)
|
||||
ret void
|
||||
}
|
|
@ -9,7 +9,10 @@ define amdgpu_ps void @disabled_input(float inreg %arg0, float %psinput0, float
|
|||
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
|
||||
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
|
||||
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 0, 15, [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY1]](s32), 0, 0
|
||||
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
|
||||
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
|
||||
; CHECK: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
|
||||
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), [[C]](s32), [[C1]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY1]](s32), [[C2]](s1), [[C2]](s1)
|
||||
; CHECK: S_ENDPGM 0
|
||||
main_body:
|
||||
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg0, float %arg0, float %arg0, float %psinput1, i1 false, i1 false) #0
|
||||
|
@ -24,14 +27,17 @@ define amdgpu_ps void @disabled_input_struct(float inreg %arg0, { float, float }
|
|||
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
|
||||
; CHECK: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
|
||||
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 0, 15, [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY1]](s32), 0, 0
|
||||
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
|
||||
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
|
||||
; CHECK: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
|
||||
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), [[C]](s32), [[C1]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY1]](s32), [[C2]](s1), [[C2]](s1)
|
||||
; CHECK: S_ENDPGM 0
|
||||
main_body:
|
||||
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg0, float %arg0, float %arg0, float %psinput1, i1 false, i1 false) #0
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) #0
|
||||
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { "InitialPSInputAddr"="0x00002" }
|
||||
|
|
|
@ -1,8 +1,9 @@
|
|||
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -stop-after=irtranslator -global-isel %s -o - | FileCheck %s
|
||||
|
||||
|
||||
; CHECK-LABEL: name: test_f32_inreg
|
||||
; CHECK: [[S0:%[0-9]+]]:_(s32) = COPY $sgpr2
|
||||
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 32, 15, [[S0]]
|
||||
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), %{{[0-9]+}}(s32), %{{[0-9]+}}(s32), [[S0]]
|
||||
define amdgpu_vs void @test_f32_inreg(float inreg %arg0) {
|
||||
call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %arg0, float undef, float undef, float undef, i1 false, i1 false) #0
|
||||
ret void
|
||||
|
@ -10,7 +11,7 @@ define amdgpu_vs void @test_f32_inreg(float inreg %arg0) {
|
|||
|
||||
; CHECK-LABEL: name: test_f32
|
||||
; CHECK: [[V0:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 32, 15, [[V0]]
|
||||
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), %{{[0-9]+}}(s32), %{{[0-9]+}}(s32), [[V0]]
|
||||
define amdgpu_vs void @test_f32(float %arg0) {
|
||||
call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %arg0, float undef, float undef, float undef, i1 false, i1 false) #0
|
||||
ret void
|
||||
|
@ -32,7 +33,7 @@ define amdgpu_vs void @test_ptr2_inreg(i32 addrspace(4)* inreg %arg0) {
|
|||
; CHECK: [[S4:%[0-9]+]]:_(s32) = COPY $sgpr4
|
||||
; CHECK: [[S34:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[S3]](s32), [[S4]](s32)
|
||||
; CHECK: G_LOAD [[S34]]
|
||||
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 32, 15, [[S2]](s32)
|
||||
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), %{{[0-9]+}}(s32), %{{[0-9]+}}(s32), [[S2]]
|
||||
define amdgpu_vs void @test_sgpr_alignment0(float inreg %arg0, i32 addrspace(4)* inreg %arg1) {
|
||||
%tmp0 = load volatile i32, i32 addrspace(4)* %arg1
|
||||
call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %arg0, float undef, float undef, float undef, i1 false, i1 false) #0
|
||||
|
@ -44,7 +45,7 @@ define amdgpu_vs void @test_sgpr_alignment0(float inreg %arg0, i32 addrspace(4)*
|
|||
; CHECK: [[S1:%[0-9]+]]:_(s32) = COPY $sgpr3
|
||||
; CHECK: [[V0:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; CHECK: [[V1:%[0-9]+]]:_(s32) = COPY $vgpr1
|
||||
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 32, 15, [[V0]](s32), [[S0]](s32), [[V1]](s32), [[S1]](s32)
|
||||
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), %{{[0-9]+}}(s32), %{{[0-9]+}}(s32), [[V0]](s32), [[S0]](s32), [[V1]](s32), [[S1]](s32)
|
||||
define amdgpu_vs void @test_order(float inreg %arg0, float inreg %arg1, float %arg2, float %arg3) {
|
||||
call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %arg2, float %arg0, float %arg3, float %arg1, i1 false, i1 false) #0
|
||||
ret void
|
||||
|
|
|
@ -9,13 +9,14 @@ define amdgpu_ps void @test_div_scale(float %arg0, float %arg1) {
|
|||
; CHECK: liveins: $vgpr0, $vgpr1
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
|
||||
; CHECK: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
|
||||
; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
|
||||
; CHECK: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
|
||||
; CHECK: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), -1
|
||||
; CHECK: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), [[C]](s1)
|
||||
; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[INT1]](s1)
|
||||
; CHECK: G_STORE [[INT]](s32), [[DEF]](p1) :: (store 4 into `float addrspace(1)* undef`, addrspace 1)
|
||||
; CHECK: G_STORE [[SEXT]](s32), [[DEF1]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
|
||||
; CHECK: S_ENDPGM 0
|
||||
; CHECK: S_ENDPGM
|
||||
%call = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %arg0, float %arg1, i1 true)
|
||||
%extract0 = extractvalue { float, i1 } %call, 0
|
||||
%extract1 = extractvalue { float, i1 } %call, 1
|
||||
|
|
|
@ -1,519 +0,0 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -stop-after=instruction-select -o - %s | FileCheck -check-prefix=UNPACKED %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=instruction-select -o - %s | FileCheck -check-prefix=PACKED %s
|
||||
|
||||
define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f16(<4 x i32> inreg %rsrc, half %val, i32 %voffset, i32 inreg %soffset) {
|
||||
; UNPACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f16
|
||||
; UNPACKED: bb.1 (%ir-block.0):
|
||||
; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
|
||||
; UNPACKED: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
|
||||
; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
|
||||
; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
|
||||
; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
|
||||
; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; UNPACKED: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; UNPACKED: S_ENDPGM 0
|
||||
; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f16
|
||||
; PACKED: bb.1 (%ir-block.0):
|
||||
; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
|
||||
; PACKED: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
|
||||
; PACKED: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
|
||||
; PACKED: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
|
||||
; PACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
|
||||
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; PACKED: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; PACKED: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.format.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__voffset_4095__sgpr_soffset_f16(<4 x i32> inreg %rsrc, half %val, i32 inreg %soffset) {
|
||||
; UNPACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__voffset_4095__sgpr_soffset_f16
|
||||
; UNPACKED: bb.1 (%ir-block.0):
|
||||
; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
|
||||
; UNPACKED: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
|
||||
; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
|
||||
; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
|
||||
; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
|
||||
; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; UNPACKED: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY4]], $noreg, [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom TargetCustom7 + 4095, align 1, addrspace 4)
|
||||
; UNPACKED: S_ENDPGM 0
|
||||
; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__voffset_4095__sgpr_soffset_f16
|
||||
; PACKED: bb.1 (%ir-block.0):
|
||||
; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
|
||||
; PACKED: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
|
||||
; PACKED: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
|
||||
; PACKED: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
|
||||
; PACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
|
||||
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; PACKED: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY4]], $noreg, [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom TargetCustom7 + 4095, align 1, addrspace 4)
|
||||
; PACKED: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.format.f16(half %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16(<4 x i32> inreg %rsrc, <2 x half> %val, i32 %voffset, i32 inreg %soffset) {
|
||||
; UNPACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16
|
||||
; UNPACKED: bb.1 (%ir-block.0):
|
||||
; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
|
||||
; UNPACKED: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
|
||||
; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
|
||||
; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
|
||||
; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
|
||||
; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 16
|
||||
; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY7]], [[COPY4]], implicit $exec
|
||||
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1
|
||||
; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; UNPACKED: S_ENDPGM 0
|
||||
; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16
|
||||
; PACKED: bb.1 (%ir-block.0):
|
||||
; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
|
||||
; PACKED: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
|
||||
; PACKED: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
|
||||
; PACKED: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
|
||||
; PACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
|
||||
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; PACKED: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f16(<4 x i32> inreg %rsrc, <4 x half> %val, i32 %voffset, i32 inreg %soffset) {
|
||||
; UNPACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f16
|
||||
; UNPACKED: bb.1 (%ir-block.0):
|
||||
; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2
|
||||
; UNPACKED: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
|
||||
; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
|
||||
; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
|
||||
; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
|
||||
; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; UNPACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 16
|
||||
; UNPACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY8]], [[COPY4]], implicit $exec
|
||||
; UNPACKED: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; UNPACKED: [[V_LSHRREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY9]], [[COPY5]], implicit $exec
|
||||
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1, [[COPY5]], %subreg.sub2, [[V_LSHRREV_B32_e64_1]], %subreg.sub3
|
||||
; UNPACKED: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; UNPACKED: S_ENDPGM 0
|
||||
; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f16
|
||||
; PACKED: bb.1 (%ir-block.0):
|
||||
; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2
|
||||
; PACKED: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
|
||||
; PACKED: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
|
||||
; PACKED: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
|
||||
; PACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
|
||||
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; PACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
|
||||
; PACKED: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; PACKED: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.format.v4f16(<4 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Make sure unpack code is emitted outside of loop
|
||||
define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f16(<4 x i32> %rsrc, <4 x half> %val, i32 %voffset, i32 inreg %soffset) {
|
||||
; UNPACKED-LABEL: name: raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f16
|
||||
; UNPACKED: bb.1 (%ir-block.0):
|
||||
; UNPACKED: successors: %bb.2(0x80000000)
|
||||
; UNPACKED: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6
|
||||
; UNPACKED: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; UNPACKED: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; UNPACKED: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; UNPACKED: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
|
||||
; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
|
||||
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5
|
||||
; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6
|
||||
; UNPACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr2
|
||||
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 16
|
||||
; UNPACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY8]], [[COPY4]], implicit $exec
|
||||
; UNPACKED: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; UNPACKED: [[V_LSHRREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY9]], [[COPY5]], implicit $exec
|
||||
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1, [[COPY5]], %subreg.sub2, [[V_LSHRREV_B32_e64_1]], %subreg.sub3
|
||||
; UNPACKED: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
|
||||
; UNPACKED: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
|
||||
; UNPACKED: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
|
||||
; UNPACKED: bb.2:
|
||||
; UNPACKED: successors: %bb.3(0x40000000), %bb.2(0x40000000)
|
||||
; UNPACKED: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub0, implicit $exec
|
||||
; UNPACKED: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub1, implicit $exec
|
||||
; UNPACKED: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
|
||||
; UNPACKED: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY10]], implicit $exec
|
||||
; UNPACKED: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub0, implicit $exec
|
||||
; UNPACKED: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub1, implicit $exec
|
||||
; UNPACKED: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
|
||||
; UNPACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY11]], implicit $exec
|
||||
; UNPACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
|
||||
; UNPACKED: [[REG_SEQUENCE4:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
|
||||
; UNPACKED: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
; UNPACKED: bb.3:
|
||||
; UNPACKED: successors: %bb.4(0x80000000)
|
||||
; UNPACKED: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
|
||||
; UNPACKED: bb.4:
|
||||
; UNPACKED: S_ENDPGM 0
|
||||
; PACKED-LABEL: name: raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f16
|
||||
; PACKED: bb.1 (%ir-block.0):
|
||||
; PACKED: successors: %bb.2(0x80000000)
|
||||
; PACKED: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6
|
||||
; PACKED: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; PACKED: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; PACKED: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; PACKED: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
|
||||
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
|
||||
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5
|
||||
; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6
|
||||
; PACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr2
|
||||
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
|
||||
; PACKED: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
|
||||
; PACKED: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
|
||||
; PACKED: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
|
||||
; PACKED: bb.2:
|
||||
; PACKED: successors: %bb.3(0x40000000), %bb.2(0x40000000)
|
||||
; PACKED: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec
|
||||
; PACKED: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec
|
||||
; PACKED: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
|
||||
; PACKED: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec
|
||||
; PACKED: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec
|
||||
; PACKED: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec
|
||||
; PACKED: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
|
||||
; PACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY9]], implicit $exec
|
||||
; PACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
|
||||
; PACKED: [[REG_SEQUENCE4:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
|
||||
; PACKED: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
; PACKED: bb.3:
|
||||
; PACKED: successors: %bb.4(0x80000000)
|
||||
; PACKED: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
|
||||
; PACKED: bb.4:
|
||||
; PACKED: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.format.v4f16(<4 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset4095(<4 x i32> inreg %rsrc, <2 x half> %val, i32 %voffset, i32 inreg %soffset) {
|
||||
; UNPACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset4095
|
||||
; UNPACKED: bb.1 (%ir-block.0):
|
||||
; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
|
||||
; UNPACKED: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
|
||||
; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
|
||||
; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
|
||||
; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
|
||||
; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095
|
||||
; UNPACKED: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 16
|
||||
; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
|
||||
; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY6]], [[COPY4]], implicit $exec
|
||||
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1
|
||||
; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; UNPACKED: S_ENDPGM 0
|
||||
; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset4095
|
||||
; PACKED: bb.1 (%ir-block.0):
|
||||
; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
|
||||
; PACKED: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
|
||||
; PACKED: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
|
||||
; PACKED: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
|
||||
; PACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
|
||||
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; PACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095
|
||||
; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; PACKED: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 4095, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset4096(<4 x i32> inreg %rsrc, <2 x half> %val, i32 %voffset, i32 inreg %soffset) {
|
||||
; UNPACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset4096
|
||||
; UNPACKED: bb.1 (%ir-block.0):
|
||||
; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
|
||||
; UNPACKED: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
|
||||
; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
|
||||
; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
|
||||
; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
|
||||
; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
|
||||
; UNPACKED: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 16
|
||||
; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
|
||||
; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY6]], [[COPY4]], implicit $exec
|
||||
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1
|
||||
; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; UNPACKED: S_ENDPGM 0
|
||||
; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset4096
|
||||
; PACKED: bb.1 (%ir-block.0):
|
||||
; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
|
||||
; PACKED: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
|
||||
; PACKED: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
|
||||
; PACKED: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
|
||||
; PACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
|
||||
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; PACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
|
||||
; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; PACKED: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 4096, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_voffset_add_16(<4 x i32> inreg %rsrc, <2 x half> %val, i32 %voffset, i32 inreg %soffset) {
|
||||
; UNPACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_voffset_add_16
|
||||
; UNPACKED: bb.1 (%ir-block.0):
|
||||
; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
|
||||
; UNPACKED: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
|
||||
; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
|
||||
; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
|
||||
; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
|
||||
; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 16
|
||||
; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; UNPACKED: %11:vgpr_32, dead %21:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
|
||||
; UNPACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY8]], [[COPY4]], implicit $exec
|
||||
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1
|
||||
; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; UNPACKED: S_ENDPGM 0
|
||||
; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_voffset_add_16
|
||||
; PACKED: bb.1 (%ir-block.0):
|
||||
; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
|
||||
; PACKED: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
|
||||
; PACKED: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
|
||||
; PACKED: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
|
||||
; PACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
|
||||
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; PACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 16
|
||||
; PACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; PACKED: %11:vgpr_32, dead %13:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
|
||||
; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; PACKED: S_ENDPGM 0
|
||||
%voffset.add = add i32 %voffset, 16
|
||||
call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_voffset_add_4095(<4 x i32> inreg %rsrc, <2 x half> %val, i32 %voffset, i32 inreg %soffset) {
|
||||
; UNPACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_voffset_add_4095
|
||||
; UNPACKED: bb.1 (%ir-block.0):
|
||||
; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
|
||||
; UNPACKED: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
|
||||
; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
|
||||
; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
|
||||
; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
|
||||
; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4095
|
||||
; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; UNPACKED: %11:vgpr_32, dead %22:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
|
||||
; UNPACKED: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 16
|
||||
; UNPACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
|
||||
; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY8]], [[COPY4]], implicit $exec
|
||||
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1
|
||||
; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; UNPACKED: S_ENDPGM 0
|
||||
; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_voffset_add_4095
|
||||
; PACKED: bb.1 (%ir-block.0):
|
||||
; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
|
||||
; PACKED: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
|
||||
; PACKED: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
|
||||
; PACKED: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
|
||||
; PACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
|
||||
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; PACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4095
|
||||
; PACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; PACKED: %11:vgpr_32, dead %13:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
|
||||
; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; PACKED: S_ENDPGM 0
|
||||
%voffset.add = add i32 %voffset, 4095
|
||||
call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_voffset_add_4096(<4 x i32> inreg %rsrc, <2 x half> %val, i32 %voffset, i32 inreg %soffset) {
|
||||
; UNPACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_voffset_add_4096
|
||||
; UNPACKED: bb.1 (%ir-block.0):
|
||||
; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
|
||||
; UNPACKED: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
|
||||
; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
|
||||
; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
|
||||
; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
|
||||
; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4096
|
||||
; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; UNPACKED: %11:vgpr_32, dead %22:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
|
||||
; UNPACKED: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 16
|
||||
; UNPACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
|
||||
; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY8]], [[COPY4]], implicit $exec
|
||||
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1
|
||||
; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; UNPACKED: S_ENDPGM 0
|
||||
; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_voffset_add_4096
|
||||
; PACKED: bb.1 (%ir-block.0):
|
||||
; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
|
||||
; PACKED: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
|
||||
; PACKED: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
|
||||
; PACKED: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
|
||||
; PACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
|
||||
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; PACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4096
|
||||
; PACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; PACKED: %11:vgpr_32, dead %13:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
|
||||
; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; PACKED: S_ENDPGM 0
|
||||
%voffset.add = add i32 %voffset, 4096
|
||||
call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
; Check what happens with offset add inside a waterfall loop
|
||||
define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f16_add_4096(<4 x i32> %rsrc, <4 x half> %val, i32 %voffset, i32 inreg %soffset) {
|
||||
; UNPACKED-LABEL: name: raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f16_add_4096
|
||||
; UNPACKED: bb.1 (%ir-block.0):
|
||||
; UNPACKED: successors: %bb.2(0x80000000)
|
||||
; UNPACKED: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6
|
||||
; UNPACKED: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; UNPACKED: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; UNPACKED: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; UNPACKED: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
|
||||
; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
|
||||
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5
|
||||
; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6
|
||||
; UNPACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr2
|
||||
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4096
|
||||
; UNPACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; UNPACKED: %13:vgpr_32, dead %47:sreg_64_xexec = V_ADD_I32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec
|
||||
; UNPACKED: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 16
|
||||
; UNPACKED: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
|
||||
; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY9]], [[COPY4]], implicit $exec
|
||||
; UNPACKED: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
|
||||
; UNPACKED: [[V_LSHRREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY10]], [[COPY5]], implicit $exec
|
||||
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1, [[COPY5]], %subreg.sub2, [[V_LSHRREV_B32_e64_1]], %subreg.sub3
|
||||
; UNPACKED: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
|
||||
; UNPACKED: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
|
||||
; UNPACKED: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
|
||||
; UNPACKED: bb.2:
|
||||
; UNPACKED: successors: %bb.3(0x40000000), %bb.2(0x40000000)
|
||||
; UNPACKED: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub0, implicit $exec
|
||||
; UNPACKED: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub1, implicit $exec
|
||||
; UNPACKED: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
|
||||
; UNPACKED: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY11]], implicit $exec
|
||||
; UNPACKED: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub0, implicit $exec
|
||||
; UNPACKED: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub1, implicit $exec
|
||||
; UNPACKED: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
|
||||
; UNPACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY12]], implicit $exec
|
||||
; UNPACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
|
||||
; UNPACKED: [[REG_SEQUENCE4:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
|
||||
; UNPACKED: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
; UNPACKED: bb.3:
|
||||
; UNPACKED: successors: %bb.4(0x80000000)
|
||||
; UNPACKED: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
|
||||
; UNPACKED: bb.4:
|
||||
; UNPACKED: S_ENDPGM 0
|
||||
; PACKED-LABEL: name: raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f16_add_4096
|
||||
; PACKED: bb.1 (%ir-block.0):
|
||||
; PACKED: successors: %bb.2(0x80000000)
|
||||
; PACKED: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6
|
||||
; PACKED: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; PACKED: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; PACKED: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; PACKED: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
|
||||
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
|
||||
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5
|
||||
; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6
|
||||
; PACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr2
|
||||
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
|
||||
; PACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4096
|
||||
; PACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; PACKED: %13:vgpr_32, dead %31:sreg_64_xexec = V_ADD_I32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec
|
||||
; PACKED: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
|
||||
; PACKED: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
|
||||
; PACKED: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
|
||||
; PACKED: bb.2:
|
||||
; PACKED: successors: %bb.3(0x40000000), %bb.2(0x40000000)
|
||||
; PACKED: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec
|
||||
; PACKED: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec
|
||||
; PACKED: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
|
||||
; PACKED: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY9]], implicit $exec
|
||||
; PACKED: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub0, implicit $exec
|
||||
; PACKED: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub1, implicit $exec
|
||||
; PACKED: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
|
||||
; PACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY10]], implicit $exec
|
||||
; PACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
|
||||
; PACKED: [[REG_SEQUENCE4:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
|
||||
; PACKED: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
; PACKED: bb.3:
|
||||
; PACKED: successors: %bb.4(0x80000000)
|
||||
; PACKED: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
|
||||
; PACKED: bb.4:
|
||||
; PACKED: S_ENDPGM 0
|
||||
%voffset.add = add i32 %voffset, 4096
|
||||
call void @llvm.amdgcn.raw.buffer.store.format.v4f16(<4 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.amdgcn.raw.buffer.store.format.f16(half, <4 x i32>, i32, i32, i32 immarg)
|
||||
declare void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half>, <4 x i32>, i32, i32, i32 immarg)
|
||||
declare void @llvm.amdgcn.raw.buffer.store.format.v4f16(<4 x half>, <4 x i32>, i32, i32, i32 immarg)
|
|
@ -1,314 +0,0 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -stop-after=instruction-select -o - %s | FileCheck %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=instruction-select -o - %s | FileCheck %s
|
||||
|
||||
define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f32(<4 x i32> inreg %rsrc, float %val, i32 %voffset, i32 inreg %soffset) {
|
||||
; CHECK-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f32
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
|
||||
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
|
||||
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: BUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.format.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__voffset_4095__sgpr_soffset_f32(<4 x i32> inreg %rsrc, float %val, i32 inreg %soffset) {
|
||||
; CHECK-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__voffset_4095__sgpr_soffset_f32
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
|
||||
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
|
||||
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: BUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY4]], $noreg, [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7 + 4095, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.format.f32(float %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32(<4 x i32> inreg %rsrc, <2 x float> %val, i32 %voffset, i32 inreg %soffset) {
|
||||
; CHECK-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2
|
||||
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
|
||||
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
|
||||
; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v3f32(<4 x i32> inreg %rsrc, <3 x float> %val, i32 %voffset, i32 inreg %soffset) {
|
||||
; CHECK-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v3f32
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3
|
||||
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
|
||||
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
|
||||
; CHECK: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2
|
||||
; CHECK: BUFFER_STORE_FORMAT_XYZ_OFFEN_exact [[REG_SEQUENCE1]], [[COPY7]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.format.v3f32(<3 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f32(<4 x i32> inreg %rsrc, <4 x float> %val, i32 %voffset, i32 inreg %soffset) {
|
||||
; CHECK-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f32
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
|
||||
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
|
||||
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
|
||||
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4
|
||||
; CHECK: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3
|
||||
; CHECK: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE]], [[COPY9]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.format.v4f32(<4 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f32(<4 x i32> %rsrc, <4 x float> %val, i32 %voffset, i32 inreg %soffset) {
|
||||
; CHECK-LABEL: name: raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f32
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: successors: %bb.2(0x80000000)
|
||||
; CHECK: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5
|
||||
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6
|
||||
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr7
|
||||
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr8
|
||||
; CHECK: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr2
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3
|
||||
; CHECK: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
|
||||
; CHECK: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
|
||||
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
|
||||
; CHECK: bb.2:
|
||||
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
|
||||
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub0, implicit $exec
|
||||
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub1, implicit $exec
|
||||
; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
|
||||
; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY10]], implicit $exec
|
||||
; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub0, implicit $exec
|
||||
; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub1, implicit $exec
|
||||
; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
|
||||
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY11]], implicit $exec
|
||||
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
|
||||
; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
|
||||
; CHECK: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE4]], [[COPY9]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
; CHECK: bb.3:
|
||||
; CHECK: successors: %bb.4(0x80000000)
|
||||
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
|
||||
; CHECK: bb.4:
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.format.v4f32(<4 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32_soffset4095(<4 x i32> inreg %rsrc, <2 x float> %val, i32 %voffset, i32 inreg %soffset) {
|
||||
; CHECK-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32_soffset4095
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2
|
||||
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
|
||||
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
|
||||
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095
|
||||
; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 4095, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32_soffset4096(<4 x i32> inreg %rsrc, <2 x float> %val, i32 %voffset, i32 inreg %soffset) {
|
||||
; CHECK-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32_soffset4096
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2
|
||||
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
|
||||
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
|
||||
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
|
||||
; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 4096, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32_voffset_add_16(<4 x i32> inreg %rsrc, <2 x float> %val, i32 %voffset, i32 inreg %soffset) {
|
||||
; CHECK-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32_voffset_add_16
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2
|
||||
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
|
||||
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
|
||||
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 16
|
||||
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; CHECK: %13:vgpr_32, dead %15:sreg_64_xexec = V_ADD_I32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec
|
||||
; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
%voffset.add = add i32 %voffset, 16
|
||||
call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32_voffset_add_4095(<4 x i32> inreg %rsrc, <2 x float> %val, i32 %voffset, i32 inreg %soffset) {
|
||||
; CHECK-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32_voffset_add_4095
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2
|
||||
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
|
||||
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
|
||||
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4095
|
||||
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; CHECK: %13:vgpr_32, dead %15:sreg_64_xexec = V_ADD_I32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec
|
||||
; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
%voffset.add = add i32 %voffset, 4095
|
||||
call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32_voffset_add_4096(<4 x i32> inreg %rsrc, <2 x float> %val, i32 %voffset, i32 inreg %soffset) {
|
||||
; CHECK-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32_voffset_add_4096
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2
|
||||
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
|
||||
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
|
||||
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4096
|
||||
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; CHECK: %13:vgpr_32, dead %15:sreg_64_xexec = V_ADD_I32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec
|
||||
; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
%voffset.add = add i32 %voffset, 4096
|
||||
call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
; Check what happens with offset add inside a waterfall loop
|
||||
define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f32_add_4096(<4 x i32> %rsrc, <4 x float> %val, i32 %voffset, i32 inreg %soffset) {
|
||||
; CHECK-LABEL: name: raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f32_add_4096
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: successors: %bb.2(0x80000000)
|
||||
; CHECK: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5
|
||||
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6
|
||||
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr7
|
||||
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr8
|
||||
; CHECK: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr2
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3
|
||||
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4096
|
||||
; CHECK: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; CHECK: %15:vgpr_32, dead %33:sreg_64_xexec = V_ADD_I32_e64 [[COPY8]], [[COPY10]], 0, implicit $exec
|
||||
; CHECK: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
|
||||
; CHECK: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
|
||||
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
|
||||
; CHECK: bb.2:
|
||||
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
|
||||
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub0, implicit $exec
|
||||
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub1, implicit $exec
|
||||
; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
|
||||
; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY11]], implicit $exec
|
||||
; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub0, implicit $exec
|
||||
; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub1, implicit $exec
|
||||
; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
|
||||
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY12]], implicit $exec
|
||||
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
|
||||
; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
|
||||
; CHECK: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], %15, [[REG_SEQUENCE4]], [[COPY9]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
; CHECK: bb.3:
|
||||
; CHECK: successors: %bb.4(0x80000000)
|
||||
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
|
||||
; CHECK: bb.4:
|
||||
; CHECK: S_ENDPGM 0
|
||||
%voffset.add = add i32 %voffset, 4096
|
||||
call void @llvm.amdgcn.raw.buffer.store.format.v4f32(<4 x float> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.amdgcn.raw.buffer.store.format.f32(float, <4 x i32>, i32, i32, i32 immarg)
|
||||
declare void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float>, <4 x i32>, i32, i32, i32 immarg)
|
||||
declare void @llvm.amdgcn.raw.buffer.store.format.v3f32(<3 x float>, <4 x i32>, i32, i32, i32 immarg)
|
||||
declare void @llvm.amdgcn.raw.buffer.store.format.v4f32(<4 x float>, <4 x i32>, i32, i32, i32 immarg)
|
|
@ -1,791 +0,0 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck %s
|
||||
; FIXME: Test with SI when argument lowering not broken for f16
|
||||
|
||||
; Natural mapping
|
||||
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, float %val, i32 %voffset, i32 inreg %soffset) {
|
||||
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
|
||||
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
|
||||
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Copies for VGPR arguments
|
||||
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__sgpr_val__sgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, float inreg %val, i32 inreg %voffset, i32 inreg %soffset) {
|
||||
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__sgpr_val__sgpr_voffset__sgpr_soffset
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8
|
||||
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
|
||||
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
|
||||
; CHECK: [[COPY4:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr6
|
||||
; CHECK: [[COPY5:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr7
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr8
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]]
|
||||
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY7]], [[COPY8]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Waterfall for rsrc
|
||||
define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset(<4 x i32> %rsrc, float %val, i32 %voffset, i32 inreg %soffset) {
|
||||
; CHECK-LABEL: name: raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: successors: %bb.2(0x80000000)
|
||||
; CHECK: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr2
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
|
||||
; CHECK: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
|
||||
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
|
||||
; CHECK: bb.2:
|
||||
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
|
||||
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec
|
||||
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec
|
||||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
|
||||
; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec
|
||||
; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec
|
||||
; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec
|
||||
; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
|
||||
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec
|
||||
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
|
||||
; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
; CHECK: bb.3:
|
||||
; CHECK: successors: %bb.4(0x80000000)
|
||||
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
|
||||
; CHECK: bb.4:
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Waterfall for soffset
|
||||
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__vgpr_soffset(<4 x i32> inreg %rsrc, float %val, i32 %voffset, i32 %soffset) {
|
||||
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__vgpr_soffset
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: successors: %bb.2(0x80000000)
|
||||
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0, $vgpr1, $vgpr2
|
||||
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
|
||||
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
|
||||
; CHECK: bb.2:
|
||||
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
|
||||
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
|
||||
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY6]], implicit $exec
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[V_READFIRSTLANE_B32_]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
; CHECK: bb.3:
|
||||
; CHECK: successors: %bb.4(0x80000000)
|
||||
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
|
||||
; CHECK: bb.4:
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Waterfall for rsrc and soffset
|
||||
define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__vgpr_soffset(<4 x i32> %rsrc, float %val, i32 %voffset, i32 %soffset) {
|
||||
; CHECK-LABEL: name: raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__vgpr_soffset
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: successors: %bb.2(0x80000000)
|
||||
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5
|
||||
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
|
||||
; CHECK: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
|
||||
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
|
||||
; CHECK: bb.2:
|
||||
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
|
||||
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec
|
||||
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec
|
||||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
|
||||
; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec
|
||||
; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec
|
||||
; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec
|
||||
; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
|
||||
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec
|
||||
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
|
||||
; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
|
||||
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
|
||||
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
|
||||
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
; CHECK: bb.3:
|
||||
; CHECK: successors: %bb.4(0x80000000)
|
||||
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
|
||||
; CHECK: bb.4:
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_glc(<4 x i32> inreg %rsrc, float %val, i32 %voffset, i32 inreg %soffset) {
|
||||
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_glc
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
|
||||
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
|
||||
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 1)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_slc(<4 x i32> inreg %rsrc, float %val, i32 %voffset, i32 inreg %soffset) {
|
||||
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_slc
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
|
||||
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
|
||||
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 1, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 2)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_glc_slc(<4 x i32> inreg %rsrc, float %val, i32 %voffset, i32 inreg %soffset) {
|
||||
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_glc_slc
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
|
||||
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
|
||||
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 1, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 3)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_dlc(<4 x i32> inreg %rsrc, float %val, i32 %voffset, i32 inreg %soffset) {
|
||||
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_dlc
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
|
||||
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
|
||||
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 1, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 4)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_slc_dlc(<4 x i32> inreg %rsrc, float %val, i32 %voffset, i32 inreg %soffset) {
|
||||
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_slc_dlc
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
|
||||
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
|
||||
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 1, 0, 1, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 6)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_glc_dlc(<4 x i32> inreg %rsrc, float %val, i32 %voffset, i32 inreg %soffset) {
|
||||
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_glc_dlc
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
|
||||
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
|
||||
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, 0, 1, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 5)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_glc_slc_dlc(<4 x i32> inreg %rsrc, float %val, i32 %voffset, i32 inreg %soffset) {
|
||||
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_glc_slc_dlc
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
|
||||
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
|
||||
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 1, 0, 1, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 7)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32(<4 x i32> inreg %rsrc, <2 x float> %val, i32 %voffset, i32 inreg %soffset) {
|
||||
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2
|
||||
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
|
||||
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
|
||||
; CHECK: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v3f32(<4 x i32> inreg %rsrc, <3 x float> %val, i32 %voffset, i32 inreg %soffset) {
|
||||
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v3f32
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3
|
||||
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
|
||||
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
|
||||
; CHECK: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2
|
||||
; CHECK: BUFFER_STORE_DWORDX3_OFFEN_exact [[REG_SEQUENCE1]], [[COPY7]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.v3f32(<3 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f32(<4 x i32> inreg %rsrc, <4 x float> %val, i32 %voffset, i32 inreg %soffset) {
|
||||
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f32
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
|
||||
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
|
||||
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
|
||||
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4
|
||||
; CHECK: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3
|
||||
; CHECK: BUFFER_STORE_DWORDX4_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE]], [[COPY9]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_i8(<4 x i32> inreg %rsrc, i32 %val, i32 %voffset, i32 inreg %soffset) {
|
||||
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_i8
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
|
||||
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
|
||||
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: BUFFER_STORE_BYTE_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom TargetCustom7, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
%val.trunc = trunc i32 %val to i8
|
||||
call void @llvm.amdgcn.raw.buffer.store.i8(i8 %val.trunc, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_i16(<4 x i32> inreg %rsrc, i32 %val, i32 %voffset, i32 inreg %soffset) {
|
||||
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_i16
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
|
||||
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
|
||||
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: BUFFER_STORE_SHORT_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
%val.trunc = trunc i32 %val to i16
|
||||
call void @llvm.amdgcn.raw.buffer.store.i16(i16 %val.trunc, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f16(<4 x i32> inreg %rsrc, half %val, i32 %voffset, i32 inreg %soffset) {
|
||||
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f16
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
|
||||
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
|
||||
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: BUFFER_STORE_SHORT_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16(<4 x i32> inreg %rsrc, <2 x half> %val, i32 %voffset, i32 inreg %soffset) {
|
||||
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
|
||||
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
|
||||
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f16(<4 x i32> inreg %rsrc, <4 x half> %val, i32 %voffset, i32 inreg %soffset) {
|
||||
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f16
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2
|
||||
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
|
||||
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
|
||||
; CHECK: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.v4f16(<4 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f16(<4 x i32> %rsrc, <4 x half> %val, i32 %voffset, i32 inreg %soffset) {
|
||||
; CHECK-LABEL: name: raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f16
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: successors: %bb.2(0x80000000)
|
||||
; CHECK: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5
|
||||
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6
|
||||
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr2
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
|
||||
; CHECK: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
|
||||
; CHECK: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
|
||||
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
|
||||
; CHECK: bb.2:
|
||||
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
|
||||
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec
|
||||
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec
|
||||
; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
|
||||
; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec
|
||||
; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec
|
||||
; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec
|
||||
; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
|
||||
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY9]], implicit $exec
|
||||
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
|
||||
; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
|
||||
; CHECK: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
; CHECK: bb.3:
|
||||
; CHECK: successors: %bb.4(0x80000000)
|
||||
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
|
||||
; CHECK: bb.4:
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.v4f16(<4 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__sgpr_soffset_f32_voffset4095(<4 x i32> inreg %rsrc, float %val, i32 inreg %soffset) {
|
||||
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__sgpr_soffset_f32_voffset4095
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
|
||||
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
|
||||
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], $noreg, [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7 + 4095, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__sgpr_soffset_f32_voffset4096(<4 x i32> inreg %rsrc, float %val, i32 inreg %soffset) {
|
||||
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__sgpr_soffset_f32_voffset4096
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
|
||||
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
|
||||
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7 + 4096, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 4096, i32 %soffset, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f32_voffset_add_16(<4 x i32> inreg %rsrc, float %val, i32 %voffset, i32 inreg %soffset) {
|
||||
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f32_voffset_add_16
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
|
||||
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
|
||||
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 16
|
||||
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; CHECK: %11:vgpr_32, dead %13:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
%voffset.add = add i32 %voffset, 16
|
||||
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f32_voffset_add_4095(<4 x i32> inreg %rsrc, float %val, i32 %voffset, i32 inreg %soffset) {
|
||||
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f32_voffset_add_4095
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
|
||||
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
|
||||
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4095
|
||||
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; CHECK: %11:vgpr_32, dead %13:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
%voffset.add = add i32 %voffset, 4095
|
||||
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f32_voffset_add_4096(<4 x i32> inreg %rsrc, float %val, i32 %voffset, i32 inreg %soffset) {
|
||||
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f32_voffset_add_4096
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
|
||||
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
|
||||
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4096
|
||||
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; CHECK: %11:vgpr_32, dead %13:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
%voffset.add = add i32 %voffset, 4096
|
||||
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset4095(<4 x i32> inreg %rsrc, <2 x half> %val, i32 %voffset, i32 inreg %soffset) {
|
||||
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset4095
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
|
||||
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
|
||||
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 4095, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset4096(<4 x i32> inreg %rsrc, <2 x half> %val, i32 %voffset, i32 inreg %soffset) {
|
||||
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset4096
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
|
||||
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
|
||||
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 4096, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset_add_16(<4 x i32> inreg %rsrc, <2 x half> %val, i32 %voffset, i32 inreg %soffset) {
|
||||
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset_add_16
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
|
||||
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
|
||||
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 16
|
||||
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; CHECK: %11:vgpr_32, dead %13:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
%voffset.add = add i32 %voffset, 16
|
||||
call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset_add_4095(<4 x i32> inreg %rsrc, <2 x half> %val, i32 %voffset, i32 inreg %soffset) {
|
||||
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset_add_4095
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
|
||||
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
|
||||
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4095
|
||||
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; CHECK: %11:vgpr_32, dead %13:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
%voffset.add = add i32 %voffset, 4095
|
||||
call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset_add_4096(<4 x i32> inreg %rsrc, <2 x half> %val, i32 %voffset, i32 inreg %soffset) {
|
||||
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset_add_4096
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
|
||||
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
|
||||
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4096
|
||||
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; CHECK: %11:vgpr_32, dead %13:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
%voffset.add = add i32 %voffset, 4096
|
||||
call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
; An add of the offset is necessary, with a waterfall loop. Make sure the add is done outside of the waterfall loop.
|
||||
define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_offset_add_5000(<4 x i32> %rsrc, float %val, i32 %voffset, i32 inreg %soffset) {
|
||||
; CHECK-LABEL: name: raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_offset_add_5000
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: successors: %bb.2(0x80000000)
|
||||
; CHECK: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr2
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 5000
|
||||
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; CHECK: %11:vgpr_32, dead %29:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
|
||||
; CHECK: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
|
||||
; CHECK: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
|
||||
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
|
||||
; CHECK: bb.2:
|
||||
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
|
||||
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec
|
||||
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec
|
||||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
|
||||
; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY8]], implicit $exec
|
||||
; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec
|
||||
; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec
|
||||
; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
|
||||
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY9]], implicit $exec
|
||||
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
|
||||
; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE3]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
; CHECK: bb.3:
|
||||
; CHECK: successors: %bb.4(0x80000000)
|
||||
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
|
||||
; CHECK: bb.4:
|
||||
; CHECK: S_ENDPGM 0
|
||||
%voffset.add = add i32 %voffset, 5000
|
||||
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
; An add of the offset is necessary, with a waterfall loop. Make sure the add is done outside of the waterfall loop.
|
||||
define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__5000_voffset__sgpr_soffset_offset(<4 x i32> %rsrc, float %val, i32 inreg %soffset) {
|
||||
; CHECK-LABEL: name: raw_buffer_store__vgpr_rsrc__vgpr_val__5000_voffset__sgpr_soffset_offset
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: successors: %bb.2(0x80000000)
|
||||
; CHECK: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
|
||||
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
|
||||
; CHECK: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
|
||||
; CHECK: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
|
||||
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
|
||||
; CHECK: bb.2:
|
||||
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
|
||||
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
|
||||
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
|
||||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
|
||||
; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec
|
||||
; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec
|
||||
; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec
|
||||
; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
|
||||
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
|
||||
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
|
||||
; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[V_MOV_B32_e32_]], [[REG_SEQUENCE3]], [[COPY5]], 904, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7 + 5000, align 1, addrspace 4)
|
||||
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
; CHECK: bb.3:
|
||||
; CHECK: successors: %bb.4(0x80000000)
|
||||
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
|
||||
; CHECK: bb.4:
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 5000, i32 %soffset, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.amdgcn.raw.buffer.store.i8(i8, <4 x i32>, i32, i32, i32 immarg)
|
||||
declare void @llvm.amdgcn.raw.buffer.store.i16(i16, <4 x i32>, i32, i32, i32 immarg)
|
||||
|
||||
declare void @llvm.amdgcn.raw.buffer.store.f16(half, <4 x i32>, i32, i32, i32 immarg)
|
||||
declare void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half>, <4 x i32>, i32, i32, i32 immarg)
|
||||
declare void @llvm.amdgcn.raw.buffer.store.v4f16(<4 x half>, <4 x i32>, i32, i32, i32 immarg)
|
||||
|
||||
declare void @llvm.amdgcn.raw.buffer.store.f32(float, <4 x i32>, i32, i32, i32 immarg)
|
||||
declare void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float>, <4 x i32>, i32, i32, i32 immarg)
|
||||
declare void @llvm.amdgcn.raw.buffer.store.v3f32(<3 x float>, <4 x i32>, i32, i32, i32 immarg)
|
||||
declare void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32 immarg)
|
|
@ -1,45 +0,0 @@
|
|||
; RUN: llc -global-isel -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
declare void @llvm.amdgcn.s.sleep(i32) #0
|
||||
|
||||
; GCN-LABEL: {{^}}test_s_sleep:
|
||||
; GCN: s_sleep 0{{$}}
|
||||
; GCN: s_sleep 1{{$}}
|
||||
; GCN: s_sleep 2{{$}}
|
||||
; GCN: s_sleep 3{{$}}
|
||||
; GCN: s_sleep 4{{$}}
|
||||
; GCN: s_sleep 5{{$}}
|
||||
; GCN: s_sleep 6{{$}}
|
||||
; GCN: s_sleep 7{{$}}
|
||||
; GCN: s_sleep 8{{$}}
|
||||
; GCN: s_sleep 9{{$}}
|
||||
; GCN: s_sleep 10{{$}}
|
||||
; GCN: s_sleep 11{{$}}
|
||||
; GCN: s_sleep 12{{$}}
|
||||
; GCN: s_sleep 13{{$}}
|
||||
; GCN: s_sleep 14{{$}}
|
||||
; GCN: s_sleep 15{{$}}
|
||||
define amdgpu_kernel void @test_s_sleep(i32 %x) #0 {
|
||||
call void @llvm.amdgcn.s.sleep(i32 0)
|
||||
call void @llvm.amdgcn.s.sleep(i32 1)
|
||||
call void @llvm.amdgcn.s.sleep(i32 2)
|
||||
call void @llvm.amdgcn.s.sleep(i32 3)
|
||||
call void @llvm.amdgcn.s.sleep(i32 4)
|
||||
call void @llvm.amdgcn.s.sleep(i32 5)
|
||||
call void @llvm.amdgcn.s.sleep(i32 6)
|
||||
call void @llvm.amdgcn.s.sleep(i32 7)
|
||||
|
||||
; Values that might only work on VI
|
||||
call void @llvm.amdgcn.s.sleep(i32 8)
|
||||
call void @llvm.amdgcn.s.sleep(i32 9)
|
||||
call void @llvm.amdgcn.s.sleep(i32 10)
|
||||
call void @llvm.amdgcn.s.sleep(i32 11)
|
||||
call void @llvm.amdgcn.s.sleep(i32 12)
|
||||
call void @llvm.amdgcn.s.sleep(i32 13)
|
||||
call void @llvm.amdgcn.s.sleep(i32 14)
|
||||
call void @llvm.amdgcn.s.sleep(i32 15)
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind }
|
|
@ -23,20 +23,28 @@ body: |
|
|||
bb.0:
|
||||
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3
|
||||
; CHECK-LABEL: name: exp_s
|
||||
; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
|
||||
; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
|
||||
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
|
||||
; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
|
||||
; CHECK: [[C2:%[0-9]+]]:sgpr(s1) = G_CONSTANT i1 false
|
||||
; CHECK: [[C3:%[0-9]+]]:sgpr(s1) = G_CONSTANT i1 false
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
|
||||
; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32)
|
||||
; CHECK: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[COPY3]](s32)
|
||||
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 0, 0, [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), 0, 0
|
||||
%0:_(s32) = COPY $sgpr0
|
||||
%1:_(s32) = COPY $sgpr1
|
||||
%2:_(s32) = COPY $sgpr2
|
||||
%3:_(s32) = COPY $sgpr3
|
||||
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.f32), 0, 0, %0, %1, %2, %3, 0, 0
|
||||
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), [[C]](s32), [[C1]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[C2]](s1), [[C3]](s1)
|
||||
%0:_(s32) = G_CONSTANT i32 0
|
||||
%1:_(s32) = G_CONSTANT i32 0
|
||||
%2:_(s32) = COPY $sgpr0
|
||||
%3:_(s32) = COPY $sgpr1
|
||||
%4:_(s32) = COPY $sgpr2
|
||||
%5:_(s32) = COPY $sgpr3
|
||||
%6:_(s1) = G_CONSTANT i1 0
|
||||
%7:_(s1) = G_CONSTANT i1 0
|
||||
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.f32), %0, %1, %2, %3, %4, %5, %6, %7
|
||||
...
|
||||
---
|
||||
name: exp_v
|
||||
|
@ -46,14 +54,22 @@ body: |
|
|||
bb.0:
|
||||
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
|
||||
; CHECK-LABEL: name: exp_v
|
||||
; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
|
||||
; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
|
||||
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
|
||||
; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3
|
||||
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 0, 0, [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), 0, 0
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $vgpr1
|
||||
%2:_(s32) = COPY $vgpr2
|
||||
%3:_(s32) = COPY $vgpr3
|
||||
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.f32), 0, 0, %0, %1, %2, %3, 0, 0
|
||||
; CHECK: [[C2:%[0-9]+]]:sgpr(s1) = G_CONSTANT i1 false
|
||||
; CHECK: [[C3:%[0-9]+]]:sgpr(s1) = G_CONSTANT i1 false
|
||||
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), [[C]](s32), [[C1]](s32), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[C2]](s1), [[C3]](s1)
|
||||
%0:_(s32) = G_CONSTANT i32 0
|
||||
%1:_(s32) = G_CONSTANT i32 0
|
||||
%2:_(s32) = COPY $vgpr0
|
||||
%3:_(s32) = COPY $vgpr1
|
||||
%4:_(s32) = COPY $vgpr2
|
||||
%5:_(s32) = COPY $vgpr3
|
||||
%6:_(s1) = G_CONSTANT i1 0
|
||||
%7:_(s1) = G_CONSTANT i1 0
|
||||
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.f32), %0, %1, %2, %3, %4, %5, %6, %7
|
||||
...
|
||||
|
|
|
@ -1,21 +0,0 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs %s -o - | FileCheck %s
|
||||
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs %s -o - | FileCheck %s
|
||||
|
||||
---
|
||||
name: ds_swizzle_s
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0
|
||||
|
||||
; CHECK-LABEL: name: ds_swizzle_s
|
||||
; CHECK: liveins: $sgpr0
|
||||
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
|
||||
; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ds.swizzle), [[COPY1]](s32), 0
|
||||
%0:_(s32) = COPY $sgpr0
|
||||
%1:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ds.swizzle), %0, 0
|
||||
|
||||
...
|
|
@ -1,181 +0,0 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -stop-after=regbankselect -regbankselect-fast -o - %s | FileCheck %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -stop-after=regbankselect -regbankselect-greedy -o - %s | FileCheck %s
|
||||
|
||||
; Natural mapping
|
||||
define amdgpu_ps void @load_1d_vgpr_vaddr__sgpr_srsrc(<8 x i32> inreg %rsrc, i32 %s) {
|
||||
; CHECK-LABEL: name: load_1d_vgpr_vaddr__sgpr_srsrc
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
|
||||
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
|
||||
; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
|
||||
; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6
|
||||
; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8
|
||||
; CHECK: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9
|
||||
; CHECK: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
|
||||
; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
|
||||
; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY8]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom TargetCustom8)
|
||||
; CHECK: [[COPY9:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1)
|
||||
; CHECK: G_STORE [[INT]](<4 x s32>), [[COPY9]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
|
||||
; CHECK: S_ENDPGM 0
|
||||
%v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
|
||||
store <4 x float> %v, <4 x float> addrspace(1)* undef
|
||||
ret void
|
||||
}
|
||||
|
||||
; Copy needed for VGPR argument
|
||||
define amdgpu_ps void @load_1d_sgpr_vaddr__sgpr_srsrc(<8 x i32> inreg %rsrc, i32 inreg %s) {
|
||||
; CHECK-LABEL: name: load_1d_sgpr_vaddr__sgpr_srsrc
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10
|
||||
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
|
||||
; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
|
||||
; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6
|
||||
; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8
|
||||
; CHECK: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9
|
||||
; CHECK: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr10
|
||||
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
|
||||
; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
|
||||
; CHECK: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[COPY8]](s32)
|
||||
; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY9]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom TargetCustom8)
|
||||
; CHECK: [[COPY10:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1)
|
||||
; CHECK: G_STORE [[INT]](<4 x s32>), [[COPY10]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
|
||||
; CHECK: S_ENDPGM 0
|
||||
%v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
|
||||
store <4 x float> %v, <4 x float> addrspace(1)* undef
|
||||
ret void
|
||||
}
|
||||
|
||||
; Waterfall loop needed for rsrc
|
||||
define amdgpu_ps void @load_1d_vgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 %s) {
|
||||
; CHECK-LABEL: name: load_1d_vgpr_vaddr__vgpr_srsrc
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: successors: %bb.2(0x80000000)
|
||||
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
|
||||
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
|
||||
; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5
|
||||
; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6
|
||||
; CHECK: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY $vgpr7
|
||||
; CHECK: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr8
|
||||
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
|
||||
; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
|
||||
; CHECK: [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
|
||||
; CHECK: [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
|
||||
; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64), [[UV2:%[0-9]+]]:vreg_64(s64), [[UV3:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>)
|
||||
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
|
||||
; CHECK: bb.2:
|
||||
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
|
||||
; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF2]], %bb.1, %19, %bb.2
|
||||
; CHECK: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %12(<4 x s32>), %bb.2
|
||||
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
|
||||
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
|
||||
; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
|
||||
; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec
|
||||
; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec
|
||||
; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec
|
||||
; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
|
||||
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
|
||||
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
|
||||
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub0(s64), implicit $exec
|
||||
; CHECK: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub1(s64), implicit $exec
|
||||
; CHECK: [[MV2:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32)
|
||||
; CHECK: [[V_CMP_EQ_U64_e64_2:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV2]](s64), [[UV2]](s64), implicit $exec
|
||||
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_2]], [[S_AND_B64_]], implicit-def $scc
|
||||
; CHECK: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub0(s64), implicit $exec
|
||||
; CHECK: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub1(s64), implicit $exec
|
||||
; CHECK: [[MV3:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32)
|
||||
; CHECK: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec
|
||||
; CHECK: [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc
|
||||
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32)
|
||||
; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY8]](s32), [[BUILD_VECTOR1]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom TargetCustom8)
|
||||
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_2]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
; CHECK: bb.3:
|
||||
; CHECK: successors: %bb.4(0x80000000)
|
||||
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
|
||||
; CHECK: bb.4:
|
||||
; CHECK: [[COPY9:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1)
|
||||
; CHECK: G_STORE [[INT]](<4 x s32>), [[COPY9]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
|
||||
; CHECK: S_ENDPGM 0
|
||||
%v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
|
||||
store <4 x float> %v, <4 x float> addrspace(1)* undef
|
||||
ret void
|
||||
}
|
||||
|
||||
; Waterfall loop needed for rsrc, copy needed for vaddr
|
||||
define amdgpu_ps void @load_1d_sgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 inreg %s) {
|
||||
; CHECK-LABEL: name: load_1d_sgpr_vaddr__vgpr_srsrc
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: successors: %bb.2(0x80000000)
|
||||
; CHECK: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
|
||||
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
|
||||
; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5
|
||||
; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6
|
||||
; CHECK: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY $vgpr7
|
||||
; CHECK: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
|
||||
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
|
||||
; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
|
||||
; CHECK: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[COPY8]](s32)
|
||||
; CHECK: [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
|
||||
; CHECK: [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
|
||||
; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64), [[UV2:%[0-9]+]]:vreg_64(s64), [[UV3:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>)
|
||||
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
|
||||
; CHECK: bb.2:
|
||||
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
|
||||
; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF2]], %bb.1, %20, %bb.2
|
||||
; CHECK: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %12(<4 x s32>), %bb.2
|
||||
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
|
||||
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
|
||||
; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
|
||||
; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec
|
||||
; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec
|
||||
; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec
|
||||
; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
|
||||
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
|
||||
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
|
||||
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub0(s64), implicit $exec
|
||||
; CHECK: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub1(s64), implicit $exec
|
||||
; CHECK: [[MV2:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32)
|
||||
; CHECK: [[V_CMP_EQ_U64_e64_2:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV2]](s64), [[UV2]](s64), implicit $exec
|
||||
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_2]], [[S_AND_B64_]], implicit-def $scc
|
||||
; CHECK: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub0(s64), implicit $exec
|
||||
; CHECK: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub1(s64), implicit $exec
|
||||
; CHECK: [[MV3:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32)
|
||||
; CHECK: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec
|
||||
; CHECK: [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc
|
||||
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32)
|
||||
; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY9]](s32), [[BUILD_VECTOR1]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom TargetCustom8)
|
||||
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_2]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
; CHECK: bb.3:
|
||||
; CHECK: successors: %bb.4(0x80000000)
|
||||
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
|
||||
; CHECK: bb.4:
|
||||
; CHECK: [[COPY10:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1)
|
||||
; CHECK: G_STORE [[INT]](<4 x s32>), [[COPY10]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
|
||||
; CHECK: S_ENDPGM 0
|
||||
%v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
|
||||
store <4 x float> %v, <4 x float> addrspace(1)* undef
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
|
||||
|
||||
attributes #0 = { nounwind readonly }
|
|
@ -1,268 +0,0 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -stop-after=regbankselect -regbankselect-fast -o - %s | FileCheck %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -stop-after=regbankselect -regbankselect-greedy -o - %s | FileCheck %s
|
||||
|
||||
; Natural mapping
|
||||
define amdgpu_ps void @sample_1d_vgpr_vaddr__sgpr_rsrc__sgpr_samp(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
||||
; CHECK-LABEL: name: sample_1d_vgpr_vaddr__sgpr_rsrc__sgpr_samp
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0
|
||||
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
|
||||
; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
|
||||
; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6
|
||||
; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8
|
||||
; CHECK: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9
|
||||
; CHECK: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr10
|
||||
; CHECK: [[COPY9:%[0-9]+]]:sgpr(s32) = COPY $sgpr11
|
||||
; CHECK: [[COPY10:%[0-9]+]]:sgpr(s32) = COPY $sgpr12
|
||||
; CHECK: [[COPY11:%[0-9]+]]:sgpr(s32) = COPY $sgpr13
|
||||
; CHECK: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
|
||||
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
|
||||
; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
|
||||
; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom TargetCustom8)
|
||||
; CHECK: [[COPY13:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1)
|
||||
; CHECK: G_STORE [[INT]](<4 x s32>), [[COPY13]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
|
||||
; CHECK: S_ENDPGM 0
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
|
||||
store <4 x float> %v, <4 x float> addrspace(1)* undef
|
||||
ret void
|
||||
}
|
||||
|
||||
; Copy required for VGPR input
|
||||
define amdgpu_ps void @sample_1d_sgpr_vaddr__sgpr_rsrc__sgpr_samp(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float inreg %s) {
|
||||
; CHECK-LABEL: name: sample_1d_sgpr_vaddr__sgpr_rsrc__sgpr_samp
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14
|
||||
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
|
||||
; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
|
||||
; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6
|
||||
; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8
|
||||
; CHECK: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9
|
||||
; CHECK: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr10
|
||||
; CHECK: [[COPY9:%[0-9]+]]:sgpr(s32) = COPY $sgpr11
|
||||
; CHECK: [[COPY10:%[0-9]+]]:sgpr(s32) = COPY $sgpr12
|
||||
; CHECK: [[COPY11:%[0-9]+]]:sgpr(s32) = COPY $sgpr13
|
||||
; CHECK: [[COPY12:%[0-9]+]]:sgpr(s32) = COPY $sgpr14
|
||||
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
|
||||
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
|
||||
; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
|
||||
; CHECK: [[COPY13:%[0-9]+]]:vgpr(s32) = COPY [[COPY12]](s32)
|
||||
; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY13]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom TargetCustom8)
|
||||
; CHECK: [[COPY14:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1)
|
||||
; CHECK: G_STORE [[INT]](<4 x s32>), [[COPY14]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
|
||||
; CHECK: S_ENDPGM 0
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
|
||||
store <4 x float> %v, <4 x float> addrspace(1)* undef
|
||||
ret void
|
||||
}
|
||||
|
||||
; Waterfall loop for rsrc
|
||||
define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__sgpr_samp(<8 x i32> %rsrc, <4 x i32> inreg %samp, float %s) {
|
||||
; CHECK-LABEL: name: sample_1d_vgpr_vaddr__vgpr_rsrc__sgpr_samp
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: successors: %bb.2(0x80000000)
|
||||
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
|
||||
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
|
||||
; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5
|
||||
; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6
|
||||
; CHECK: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY $vgpr7
|
||||
; CHECK: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
|
||||
; CHECK: [[COPY9:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
|
||||
; CHECK: [[COPY10:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
|
||||
; CHECK: [[COPY11:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
|
||||
; CHECK: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr8
|
||||
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
|
||||
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
|
||||
; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
|
||||
; CHECK: [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
|
||||
; CHECK: [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
|
||||
; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64), [[UV2:%[0-9]+]]:vreg_64(s64), [[UV3:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>)
|
||||
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
|
||||
; CHECK: bb.2:
|
||||
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
|
||||
; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF2]], %bb.1, %24, %bb.2
|
||||
; CHECK: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %17(<4 x s32>), %bb.2
|
||||
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
|
||||
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
|
||||
; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
|
||||
; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec
|
||||
; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec
|
||||
; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec
|
||||
; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
|
||||
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
|
||||
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
|
||||
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub0(s64), implicit $exec
|
||||
; CHECK: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub1(s64), implicit $exec
|
||||
; CHECK: [[MV2:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32)
|
||||
; CHECK: [[V_CMP_EQ_U64_e64_2:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV2]](s64), [[UV2]](s64), implicit $exec
|
||||
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_2]], [[S_AND_B64_]], implicit-def $scc
|
||||
; CHECK: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub0(s64), implicit $exec
|
||||
; CHECK: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub1(s64), implicit $exec
|
||||
; CHECK: [[MV3:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32)
|
||||
; CHECK: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec
|
||||
; CHECK: [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc
|
||||
; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32)
|
||||
; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR2]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom TargetCustom8)
|
||||
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_2]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
; CHECK: bb.3:
|
||||
; CHECK: successors: %bb.4(0x80000000)
|
||||
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
|
||||
; CHECK: bb.4:
|
||||
; CHECK: [[COPY13:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1)
|
||||
; CHECK: G_STORE [[INT]](<4 x s32>), [[COPY13]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
|
||||
; CHECK: S_ENDPGM 0
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
|
||||
store <4 x float> %v, <4 x float> addrspace(1)* undef
|
||||
ret void
|
||||
}
|
||||
|
||||
; Waterfall loop for sampler
|
||||
define amdgpu_ps void @sample_1d_vgpr_vaddr__sgpr_rsrc__vgpr_samp(<8 x i32> inreg %rsrc, <4 x i32> %samp, float %s) {
|
||||
; CHECK-LABEL: name: sample_1d_vgpr_vaddr__sgpr_rsrc__vgpr_samp
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: successors: %bb.2(0x80000000)
|
||||
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
|
||||
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
|
||||
; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
|
||||
; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6
|
||||
; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8
|
||||
; CHECK: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9
|
||||
; CHECK: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
|
||||
; CHECK: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
|
||||
; CHECK: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY $vgpr3
|
||||
; CHECK: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
|
||||
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
|
||||
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
|
||||
; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
|
||||
; CHECK: [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
|
||||
; CHECK: [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
|
||||
; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>)
|
||||
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
|
||||
; CHECK: bb.2:
|
||||
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
|
||||
; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF2]], %bb.1, %24, %bb.2
|
||||
; CHECK: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %17(<4 x s32>), %bb.2
|
||||
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
|
||||
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
|
||||
; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
|
||||
; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec
|
||||
; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec
|
||||
; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec
|
||||
; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
|
||||
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
|
||||
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
|
||||
; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
|
||||
; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR2]](<4 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom TargetCustom8)
|
||||
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
; CHECK: bb.3:
|
||||
; CHECK: successors: %bb.4(0x80000000)
|
||||
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
|
||||
; CHECK: bb.4:
|
||||
; CHECK: [[COPY13:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1)
|
||||
; CHECK: G_STORE [[INT]](<4 x s32>), [[COPY13]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
|
||||
; CHECK: S_ENDPGM 0
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
|
||||
store <4 x float> %v, <4 x float> addrspace(1)* undef
|
||||
ret void
|
||||
}
|
||||
|
||||
; Waterfall loop for rsrc and sampler
|
||||
define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__vgpr_samp(<8 x i32> %rsrc, <4 x i32> %samp, float %s) {
|
||||
; CHECK-LABEL: name: sample_1d_vgpr_vaddr__vgpr_rsrc__vgpr_samp
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: successors: %bb.2(0x80000000)
|
||||
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
|
||||
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
|
||||
; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5
|
||||
; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6
|
||||
; CHECK: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY $vgpr7
|
||||
; CHECK: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr8
|
||||
; CHECK: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY $vgpr9
|
||||
; CHECK: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY $vgpr10
|
||||
; CHECK: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY $vgpr11
|
||||
; CHECK: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr12
|
||||
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
|
||||
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
|
||||
; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
|
||||
; CHECK: [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
|
||||
; CHECK: [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
|
||||
; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64), [[UV2:%[0-9]+]]:vreg_64(s64), [[UV3:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>)
|
||||
; CHECK: [[UV4:%[0-9]+]]:vreg_64(s64), [[UV5:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>)
|
||||
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
|
||||
; CHECK: bb.2:
|
||||
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
|
||||
; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF2]], %bb.1, %24, %bb.2
|
||||
; CHECK: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %17(<4 x s32>), %bb.2
|
||||
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
|
||||
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
|
||||
; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
|
||||
; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec
|
||||
; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec
|
||||
; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec
|
||||
; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
|
||||
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
|
||||
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
|
||||
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub0(s64), implicit $exec
|
||||
; CHECK: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub1(s64), implicit $exec
|
||||
; CHECK: [[MV2:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32)
|
||||
; CHECK: [[V_CMP_EQ_U64_e64_2:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV2]](s64), [[UV2]](s64), implicit $exec
|
||||
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_2]], [[S_AND_B64_]], implicit-def $scc
|
||||
; CHECK: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub0(s64), implicit $exec
|
||||
; CHECK: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub1(s64), implicit $exec
|
||||
; CHECK: [[MV3:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32)
|
||||
; CHECK: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec
|
||||
; CHECK: [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc
|
||||
; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32)
|
||||
; CHECK: [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV4]].sub0(s64), implicit $exec
|
||||
; CHECK: [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV4]].sub1(s64), implicit $exec
|
||||
; CHECK: [[MV4:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_8]](s32), [[V_READFIRSTLANE_B32_9]](s32)
|
||||
; CHECK: [[V_CMP_EQ_U64_e64_4:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV4]](s64), [[UV4]](s64), implicit $exec
|
||||
; CHECK: [[S_AND_B64_3:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_4]], [[S_AND_B64_2]], implicit-def $scc
|
||||
; CHECK: [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV5]].sub0(s64), implicit $exec
|
||||
; CHECK: [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV5]].sub1(s64), implicit $exec
|
||||
; CHECK: [[MV5:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_10]](s32), [[V_READFIRSTLANE_B32_11]](s32)
|
||||
; CHECK: [[V_CMP_EQ_U64_e64_5:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV5]](s64), [[UV5]](s64), implicit $exec
|
||||
; CHECK: [[S_AND_B64_4:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_5]], [[S_AND_B64_3]], implicit-def $scc
|
||||
; CHECK: [[BUILD_VECTOR3:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_8]](s32), [[V_READFIRSTLANE_B32_9]](s32), [[V_READFIRSTLANE_B32_10]](s32), [[V_READFIRSTLANE_B32_11]](s32)
|
||||
; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR2]](<8 x s32>), [[BUILD_VECTOR3]](<4 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom TargetCustom8)
|
||||
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_4]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
; CHECK: bb.3:
|
||||
; CHECK: successors: %bb.4(0x80000000)
|
||||
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
|
||||
; CHECK: bb.4:
|
||||
; CHECK: [[COPY13:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1)
|
||||
; CHECK: G_STORE [[INT]](<4 x s32>), [[COPY13]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
|
||||
; CHECK: S_ENDPGM 0
|
||||
%v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
|
||||
store <4 x float> %v, <4 x float> addrspace(1)* undef
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 immarg, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
|
||||
|
||||
attributes #0 = { nounwind readonly }
|
|
@ -1,173 +0,0 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -stop-after=regbankselect -regbankselect-fast -o - %s | FileCheck %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -stop-after=regbankselect -regbankselect-greedy -o - %s | FileCheck %s
|
||||
|
||||
; Natural mapping
|
||||
define amdgpu_ps float @raw_buffer_load__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
|
||||
; CHECK-LABEL: name: raw_buffer_load__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
|
||||
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
|
||||
; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6
|
||||
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
|
||||
; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.raw.buffer.load), [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), [[COPY5]](s32), 0 :: (dereferenceable load 4 from custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: $vgpr0 = COPY [[INT]](s32)
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
%val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret float %val
|
||||
}
|
||||
|
||||
; Copies for VGPR arguments
|
||||
define amdgpu_ps float @raw_buffer_load__sgpr_rsrc__sgpr_val__sgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 inreg %voffset, i32 inreg %soffset) {
|
||||
; CHECK-LABEL: name: raw_buffer_load__sgpr_rsrc__sgpr_val__sgpr_voffset__sgpr_soffset
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7
|
||||
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
|
||||
; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
|
||||
; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6
|
||||
; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7
|
||||
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
|
||||
; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY4]](s32)
|
||||
; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.raw.buffer.load), [[BUILD_VECTOR]](<4 x s32>), [[COPY6]](s32), [[COPY5]](s32), 0 :: (dereferenceable load 4 from custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: $vgpr0 = COPY [[INT]](s32)
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
%val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret float %val
|
||||
}
|
||||
|
||||
; Waterfall for rsrc
|
||||
define amdgpu_ps float @raw_buffer_load__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset(<4 x i32> %rsrc, i32 %voffset, i32 inreg %soffset) {
|
||||
; CHECK-LABEL: name: raw_buffer_load__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: successors: %bb.2(0x80000000)
|
||||
; CHECK: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
|
||||
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
|
||||
; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
|
||||
; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
|
||||
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
|
||||
; CHECK: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
|
||||
; CHECK: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
|
||||
; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>)
|
||||
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
|
||||
; CHECK: bb.2:
|
||||
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
|
||||
; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF1]], %bb.1, %15, %bb.2
|
||||
; CHECK: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.1, %9(s32), %bb.2
|
||||
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
|
||||
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
|
||||
; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
|
||||
; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec
|
||||
; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec
|
||||
; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec
|
||||
; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
|
||||
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
|
||||
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
|
||||
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
|
||||
; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.raw.buffer.load), [[BUILD_VECTOR1]](<4 x s32>), [[COPY4]](s32), [[COPY5]](s32), 0 :: (dereferenceable load 4 from custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
; CHECK: bb.3:
|
||||
; CHECK: successors: %bb.4(0x80000000)
|
||||
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
|
||||
; CHECK: bb.4:
|
||||
; CHECK: $vgpr0 = COPY [[INT]](s32)
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
%val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret float %val
|
||||
}
|
||||
|
||||
; Waterfall for soffset
|
||||
define amdgpu_ps float @raw_buffer_load__sgpr_rsrc__vgpr_val__vgpr_voffset__vgpr_soffset(<4 x i32> inreg %rsrc, i32 %voffset, i32 %soffset) {
|
||||
; CHECK-LABEL: name: raw_buffer_load__sgpr_rsrc__vgpr_val__vgpr_voffset__vgpr_soffset
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: successors: %bb.2(0x80000000)
|
||||
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0, $vgpr1
|
||||
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
|
||||
; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
|
||||
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
|
||||
; CHECK: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
|
||||
; CHECK: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
|
||||
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
|
||||
; CHECK: bb.2:
|
||||
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
|
||||
; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF1]], %bb.1, %15, %bb.2
|
||||
; CHECK: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.1, %9(s32), %bb.2
|
||||
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY5]](s32), implicit $exec
|
||||
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY5]](s32), implicit $exec
|
||||
; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.raw.buffer.load), [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), [[V_READFIRSTLANE_B32_]](s32), 0 :: (dereferenceable load 4 from custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
; CHECK: bb.3:
|
||||
; CHECK: successors: %bb.4(0x80000000)
|
||||
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
|
||||
; CHECK: bb.4:
|
||||
; CHECK: $vgpr0 = COPY [[INT]](s32)
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
%val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret float %val
|
||||
}
|
||||
|
||||
; Waterfall for rsrc and soffset
|
||||
define amdgpu_ps float @raw_buffer_load__vgpr_rsrc__vgpr_val__vgpr_voffset__vgpr_soffset(<4 x i32> %rsrc, i32 %voffset, i32 %soffset) {
|
||||
; CHECK-LABEL: name: raw_buffer_load__vgpr_rsrc__vgpr_val__vgpr_voffset__vgpr_soffset
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: successors: %bb.2(0x80000000)
|
||||
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
|
||||
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
|
||||
; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr5
|
||||
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
|
||||
; CHECK: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
|
||||
; CHECK: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
|
||||
; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>)
|
||||
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
|
||||
; CHECK: bb.2:
|
||||
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
|
||||
; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF1]], %bb.1, %15, %bb.2
|
||||
; CHECK: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.1, %9(s32), %bb.2
|
||||
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
|
||||
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
|
||||
; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
|
||||
; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec
|
||||
; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec
|
||||
; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec
|
||||
; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
|
||||
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
|
||||
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
|
||||
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
|
||||
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY5]](s32), implicit $exec
|
||||
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]](s32), [[COPY5]](s32), implicit $exec
|
||||
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
|
||||
; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.raw.buffer.load), [[BUILD_VECTOR1]](<4 x s32>), [[COPY4]](s32), [[V_READFIRSTLANE_B32_4]](s32), 0 :: (dereferenceable load 4 from custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
; CHECK: bb.3:
|
||||
; CHECK: successors: %bb.4(0x80000000)
|
||||
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
|
||||
; CHECK: bb.4:
|
||||
; CHECK: $vgpr0 = COPY [[INT]](s32)
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
%val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret float %val
|
||||
}
|
||||
|
||||
declare float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32>, i32, i32, i32 immarg)
|
|
@ -1,179 +0,0 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -stop-after=regbankselect -regbankselect-fast -o - %s | FileCheck %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -stop-after=regbankselect -regbankselect-greedy -o - %s | FileCheck %s
|
||||
|
||||
; Natural mapping
|
||||
define amdgpu_ps float @struct_buffer_load__sgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
|
||||
; CHECK-LABEL: name: struct_buffer_load__sgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__sgpr_soffset
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
|
||||
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
|
||||
; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr6
|
||||
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
|
||||
; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.load), [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), 0 :: (dereferenceable load 4 from custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: $vgpr0 = COPY [[INT]](s32)
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
%val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret float %val
|
||||
}
|
||||
|
||||
; Copies for VGPR arguments
|
||||
define amdgpu_ps float @struct_buffer_load__sgpr_rsrc__sgpr_val__sgpr_vindex__sgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 inreg %vindex, i32 inreg %voffset, i32 inreg %soffset) {
|
||||
; CHECK-LABEL: name: struct_buffer_load__sgpr_rsrc__sgpr_val__sgpr_vindex__sgpr_voffset__sgpr_soffset
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8
|
||||
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
|
||||
; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
|
||||
; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6
|
||||
; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8
|
||||
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
|
||||
; CHECK: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[COPY4]](s32)
|
||||
; CHECK: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32)
|
||||
; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.load), [[BUILD_VECTOR]](<4 x s32>), [[COPY7]](s32), [[COPY8]](s32), [[COPY6]](s32), 0 :: (dereferenceable load 4 from custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: $vgpr0 = COPY [[INT]](s32)
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
%val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret float %val
|
||||
}
|
||||
|
||||
; Waterfall for rsrc
|
||||
define amdgpu_ps float @struct_buffer_load__vgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__sgpr_soffset(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
|
||||
; CHECK-LABEL: name: struct_buffer_load__vgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__sgpr_soffset
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: successors: %bb.2(0x80000000)
|
||||
; CHECK: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
|
||||
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
|
||||
; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
|
||||
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
|
||||
; CHECK: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
|
||||
; CHECK: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
|
||||
; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>)
|
||||
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
|
||||
; CHECK: bb.2:
|
||||
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
|
||||
; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF1]], %bb.1, %16, %bb.2
|
||||
; CHECK: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.1, %10(s32), %bb.2
|
||||
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
|
||||
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
|
||||
; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
|
||||
; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec
|
||||
; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec
|
||||
; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec
|
||||
; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
|
||||
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
|
||||
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
|
||||
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
|
||||
; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.load), [[BUILD_VECTOR1]](<4 x s32>), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), 0 :: (dereferenceable load 4 from custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
; CHECK: bb.3:
|
||||
; CHECK: successors: %bb.4(0x80000000)
|
||||
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
|
||||
; CHECK: bb.4:
|
||||
; CHECK: $vgpr0 = COPY [[INT]](s32)
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
%val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret float %val
|
||||
}
|
||||
|
||||
; Waterfall for soffset
|
||||
define amdgpu_ps float @struct_buffer_load__sgpr_rsrc__vgpr_val__vgpr_vindex_vgpr_voffset__vgpr_soffset(<4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 %soffset) {
|
||||
; CHECK-LABEL: name: struct_buffer_load__sgpr_rsrc__vgpr_val__vgpr_vindex_vgpr_voffset__vgpr_soffset
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: successors: %bb.2(0x80000000)
|
||||
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0, $vgpr1, $vgpr2
|
||||
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
|
||||
; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
|
||||
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
|
||||
; CHECK: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
|
||||
; CHECK: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
|
||||
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
|
||||
; CHECK: bb.2:
|
||||
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
|
||||
; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF1]], %bb.1, %16, %bb.2
|
||||
; CHECK: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.1, %10(s32), %bb.2
|
||||
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY6]](s32), implicit $exec
|
||||
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY6]](s32), implicit $exec
|
||||
; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.load), [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), [[COPY5]](s32), [[V_READFIRSTLANE_B32_]](s32), 0 :: (dereferenceable load 4 from custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
; CHECK: bb.3:
|
||||
; CHECK: successors: %bb.4(0x80000000)
|
||||
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
|
||||
; CHECK: bb.4:
|
||||
; CHECK: $vgpr0 = COPY [[INT]](s32)
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
%val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret float %val
|
||||
}
|
||||
|
||||
; Waterfall for rsrc and soffset
|
||||
define amdgpu_ps float @struct_buffer_load__vgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__vgpr_soffset(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset) {
|
||||
; CHECK-LABEL: name: struct_buffer_load__vgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__vgpr_soffset
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: successors: %bb.2(0x80000000)
|
||||
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
|
||||
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
|
||||
; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5
|
||||
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr6
|
||||
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
|
||||
; CHECK: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
|
||||
; CHECK: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
|
||||
; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>)
|
||||
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
|
||||
; CHECK: bb.2:
|
||||
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
|
||||
; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF1]], %bb.1, %16, %bb.2
|
||||
; CHECK: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.1, %10(s32), %bb.2
|
||||
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
|
||||
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
|
||||
; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
|
||||
; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec
|
||||
; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec
|
||||
; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec
|
||||
; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
|
||||
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
|
||||
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
|
||||
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
|
||||
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY6]](s32), implicit $exec
|
||||
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]](s32), [[COPY6]](s32), implicit $exec
|
||||
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
|
||||
; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.load), [[BUILD_VECTOR1]](<4 x s32>), [[COPY4]](s32), [[COPY5]](s32), [[V_READFIRSTLANE_B32_4]](s32), 0 :: (dereferenceable load 4 from custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
; CHECK: bb.3:
|
||||
; CHECK: successors: %bb.4(0x80000000)
|
||||
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
|
||||
; CHECK: bb.4:
|
||||
; CHECK: $vgpr0 = COPY [[INT]](s32)
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
%val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret float %val
|
||||
}
|
||||
|
||||
declare float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32>, i32, i32, i32, i32 immarg)
|
|
@ -1,174 +0,0 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -stop-after=regbankselect -regbankselect-fast -o - %s | FileCheck %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -stop-after=regbankselect -regbankselect-greedy -o - %s | FileCheck %s
|
||||
|
||||
; Natural mapping
|
||||
define amdgpu_ps void @struct_buffer_store__sgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, float %val, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
|
||||
; CHECK-LABEL: name: struct_buffer_store__sgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__sgpr_soffset
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2
|
||||
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
|
||||
; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
|
||||
; CHECK: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr6
|
||||
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
|
||||
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.store), [[COPY4]](s32), [[BUILD_VECTOR]](<4 x s32>), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), 0 :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.struct.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Copies for VGPR arguments
|
||||
define amdgpu_ps void @struct_buffer_store__sgpr_rsrc__sgpr_val__sgpr_vindex__sgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, float inreg %val, i32 inreg %vindex, i32 inreg %voffset, i32 inreg %soffset) {
|
||||
; CHECK-LABEL: name: struct_buffer_store__sgpr_rsrc__sgpr_val__sgpr_vindex__sgpr_voffset__sgpr_soffset
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9
|
||||
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
|
||||
; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
|
||||
; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6
|
||||
; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8
|
||||
; CHECK: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9
|
||||
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
|
||||
; CHECK: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[COPY4]](s32)
|
||||
; CHECK: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32)
|
||||
; CHECK: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY [[COPY6]](s32)
|
||||
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.store), [[COPY8]](s32), [[BUILD_VECTOR]](<4 x s32>), [[COPY9]](s32), [[COPY10]](s32), [[COPY7]](s32), 0 :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.struct.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Waterfall for rsrc
|
||||
define amdgpu_ps void @struct_buffer_store__vgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__sgpr_soffset(<4 x i32> %rsrc, float %val, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
|
||||
; CHECK-LABEL: name: struct_buffer_store__vgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__sgpr_soffset
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: successors: %bb.2(0x80000000)
|
||||
; CHECK: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
|
||||
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
|
||||
; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5
|
||||
; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6
|
||||
; CHECK: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
|
||||
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
|
||||
; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
|
||||
; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>)
|
||||
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
|
||||
; CHECK: bb.2:
|
||||
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
|
||||
; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF]], %bb.1, %14, %bb.2
|
||||
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
|
||||
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
|
||||
; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
|
||||
; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec
|
||||
; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec
|
||||
; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec
|
||||
; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
|
||||
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
|
||||
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
|
||||
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
|
||||
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.store), [[COPY4]](s32), [[BUILD_VECTOR1]](<4 x s32>), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), 0 :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
; CHECK: bb.3:
|
||||
; CHECK: successors: %bb.4(0x80000000)
|
||||
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
|
||||
; CHECK: bb.4:
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.struct.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Waterfall for soffset
|
||||
define amdgpu_ps void @struct_buffer_store__sgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__vgpr_soffset(<4 x i32> inreg %rsrc, float %val, i32 %vindex, i32 %voffset, i32 %soffset) {
|
||||
; CHECK-LABEL: name: struct_buffer_store__sgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__vgpr_soffset
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: successors: %bb.2(0x80000000)
|
||||
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0, $vgpr1, $vgpr2, $vgpr3
|
||||
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
|
||||
; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
|
||||
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr3
|
||||
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
|
||||
; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
|
||||
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
|
||||
; CHECK: bb.2:
|
||||
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
|
||||
; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF]], %bb.1, %14, %bb.2
|
||||
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY7]](s32), implicit $exec
|
||||
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY7]](s32), implicit $exec
|
||||
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.store), [[COPY4]](s32), [[BUILD_VECTOR]](<4 x s32>), [[COPY5]](s32), [[COPY6]](s32), [[V_READFIRSTLANE_B32_]](s32), 0 :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
; CHECK: bb.3:
|
||||
; CHECK: successors: %bb.4(0x80000000)
|
||||
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
|
||||
; CHECK: bb.4:
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.struct.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Waterfall for rsrc and soffset
|
||||
define amdgpu_ps void @struct_buffer_store__vgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__vgpr_soffset(<4 x i32> %rsrc, float %val, i32 %vindex, i32 %voffset, i32 %soffset) {
|
||||
; CHECK-LABEL: name: struct_buffer_store__vgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__vgpr_soffset
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: successors: %bb.2(0x80000000)
|
||||
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
|
||||
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
|
||||
; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5
|
||||
; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6
|
||||
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr7
|
||||
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
|
||||
; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
|
||||
; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>)
|
||||
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
|
||||
; CHECK: bb.2:
|
||||
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
|
||||
; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF]], %bb.1, %14, %bb.2
|
||||
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
|
||||
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
|
||||
; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
|
||||
; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec
|
||||
; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec
|
||||
; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec
|
||||
; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
|
||||
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
|
||||
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
|
||||
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
|
||||
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY7]](s32), implicit $exec
|
||||
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]](s32), [[COPY7]](s32), implicit $exec
|
||||
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
|
||||
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.store), [[COPY4]](s32), [[BUILD_VECTOR1]](<4 x s32>), [[COPY5]](s32), [[COPY6]](s32), [[V_READFIRSTLANE_B32_4]](s32), 0 :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
; CHECK: bb.3:
|
||||
; CHECK: successors: %bb.4(0x80000000)
|
||||
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
|
||||
; CHECK: bb.4:
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.struct.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.amdgcn.struct.buffer.store.f32(float, <4 x i32>, i32, i32, i32, i32 immarg)
|
|
@ -1,9 +1,5 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX6 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX6 %s
|
||||
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
|
||||
|
||||
---
|
||||
name: smulh_s32_ss
|
||||
|
@ -12,16 +8,10 @@ legalized: true
|
|||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0, $sgpr1
|
||||
|
||||
; GFX6-LABEL: name: smulh_s32_ss
|
||||
; GFX6: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||
; GFX6: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
|
||||
; GFX6: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
|
||||
; GFX6: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[COPY]], [[COPY2]]
|
||||
; GFX9-LABEL: name: smulh_s32_ss
|
||||
; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||
; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
|
||||
; GFX9: [[SMULH:%[0-9]+]]:sgpr(s32) = G_SMULH [[COPY]], [[COPY1]]
|
||||
; CHECK-LABEL: name: smulh_s32_ss
|
||||
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
|
||||
; CHECK: [[SMULH:%[0-9]+]]:sgpr(s32) = G_SMULH [[COPY]], [[COPY1]]
|
||||
%0:_(s32) = COPY $sgpr0
|
||||
%1:_(s32) = COPY $sgpr1
|
||||
%2:_(s32) = G_SMULH %0, %1
|
||||
|
@ -34,15 +24,10 @@ legalized: true
|
|||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0, $vgpr0
|
||||
|
||||
; GFX6-LABEL: name: smulh_s32_sv
|
||||
; GFX6: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||
; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; GFX6: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[COPY]], [[COPY1]]
|
||||
; GFX9-LABEL: name: smulh_s32_sv
|
||||
; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; GFX9: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[COPY]], [[COPY1]]
|
||||
; CHECK-LABEL: name: smulh_s32_sv
|
||||
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[COPY]], [[COPY1]]
|
||||
%0:_(s32) = COPY $sgpr0
|
||||
%1:_(s32) = COPY $vgpr0
|
||||
%2:_(s32) = G_SMULH %0, %1
|
||||
|
@ -55,17 +40,11 @@ legalized: true
|
|||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0, $vgpr0
|
||||
|
||||
; GFX6-LABEL: name: smulh_s32_vs
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; GFX6: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||
; GFX6: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
|
||||
; GFX6: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[COPY]], [[COPY2]]
|
||||
; GFX9-LABEL: name: smulh_s32_vs
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||
; GFX9: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
|
||||
; GFX9: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[COPY]], [[COPY2]]
|
||||
; CHECK-LABEL: name: smulh_s32_vs
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
|
||||
; CHECK: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[COPY]], [[COPY2]]
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $sgpr0
|
||||
%2:_(s32) = G_SMULH %0, %1
|
||||
|
@ -78,15 +57,10 @@ legalized: true
|
|||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr1
|
||||
|
||||
; GFX6-LABEL: name: smulh_s32_vv
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
|
||||
; GFX6: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[COPY]], [[COPY1]]
|
||||
; GFX9-LABEL: name: smulh_s32_vv
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
|
||||
; GFX9: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[COPY]], [[COPY1]]
|
||||
; CHECK-LABEL: name: smulh_s32_vv
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
|
||||
; CHECK: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[COPY]], [[COPY1]]
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $vgpr1
|
||||
%2:_(s32) = G_SMULH %0, %1
|
||||
|
|
|
@ -1,9 +1,5 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX6 %s
|
||||
# XUN: llc -march=amdgcn -mcpu=tahiti -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX6 %s
|
||||
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# XUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
|
||||
|
||||
---
|
||||
name: umulh_s32_ss
|
||||
|
@ -12,16 +8,10 @@ legalized: true
|
|||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0, $sgpr1
|
||||
|
||||
; GFX6-LABEL: name: umulh_s32_ss
|
||||
; GFX6: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||
; GFX6: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
|
||||
; GFX6: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
|
||||
; GFX6: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[COPY]], [[COPY2]]
|
||||
; GFX9-LABEL: name: umulh_s32_ss
|
||||
; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||
; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
|
||||
; GFX9: [[UMULH:%[0-9]+]]:sgpr(s32) = G_UMULH [[COPY]], [[COPY1]]
|
||||
; CHECK-LABEL: name: umulh_s32_ss
|
||||
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
|
||||
; CHECK: [[UMULH:%[0-9]+]]:sgpr(s32) = G_UMULH [[COPY]], [[COPY1]]
|
||||
%0:_(s32) = COPY $sgpr0
|
||||
%1:_(s32) = COPY $sgpr1
|
||||
%2:_(s32) = G_UMULH %0, %1
|
||||
|
@ -34,15 +24,10 @@ legalized: true
|
|||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0, $vgpr0
|
||||
|
||||
; GFX6-LABEL: name: umulh_s32_sv
|
||||
; GFX6: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||
; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; GFX6: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[COPY]], [[COPY1]]
|
||||
; GFX9-LABEL: name: umulh_s32_sv
|
||||
; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; GFX9: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[COPY]], [[COPY1]]
|
||||
; CHECK-LABEL: name: umulh_s32_sv
|
||||
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[COPY]], [[COPY1]]
|
||||
%0:_(s32) = COPY $sgpr0
|
||||
%1:_(s32) = COPY $vgpr0
|
||||
%2:_(s32) = G_UMULH %0, %1
|
||||
|
@ -55,17 +40,11 @@ legalized: true
|
|||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0, $vgpr0
|
||||
|
||||
; GFX6-LABEL: name: umulh_s32_vs
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; GFX6: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||
; GFX6: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
|
||||
; GFX6: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[COPY]], [[COPY2]]
|
||||
; GFX9-LABEL: name: umulh_s32_vs
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||
; GFX9: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
|
||||
; GFX9: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[COPY]], [[COPY2]]
|
||||
; CHECK-LABEL: name: umulh_s32_vs
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
|
||||
; CHECK: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[COPY]], [[COPY2]]
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $sgpr0
|
||||
%2:_(s32) = G_UMULH %0, %1
|
||||
|
@ -78,15 +57,10 @@ legalized: true
|
|||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr1
|
||||
|
||||
; GFX6-LABEL: name: umulh_s32_vv
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
|
||||
; GFX6: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[COPY]], [[COPY1]]
|
||||
; GFX9-LABEL: name: umulh_s32_vv
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
|
||||
; GFX9: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[COPY]], [[COPY1]]
|
||||
; CHECK-LABEL: name: umulh_s32_vv
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
|
||||
; CHECK: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[COPY]], [[COPY1]]
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $vgpr1
|
||||
%2:_(s32) = G_UMULH %0, %1
|
||||
|
|
|
@ -1,70 +0,0 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -verify-machineinstrs -run-pass=machine-scheduler -verify-misched -o - %s | FileCheck %s
|
||||
|
||||
# This would assert that a dead def should have no uses, but the dead
|
||||
# def and use have different subreg indices.
|
||||
|
||||
---
|
||||
name: multi_def_dead_reg_subreg_check
|
||||
tracksRegLiveness: true
|
||||
machineFunctionInfo:
|
||||
isEntryFunction: true
|
||||
scratchRSrcReg: '$sgpr24_sgpr25_sgpr26_sgpr27'
|
||||
scratchWaveOffsetReg: '$sgpr32'
|
||||
frameOffsetReg: '$sgpr32'
|
||||
stackPtrOffsetReg: '$sgpr32'
|
||||
argumentInfo:
|
||||
privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
|
||||
privateSegmentWaveByteOffset: { reg: '$sgpr33' }
|
||||
body: |
|
||||
; CHECK-LABEL: name: multi_def_dead_reg_subreg_check
|
||||
; CHECK: bb.0:
|
||||
; CHECK: successors: %bb.1(0x80000000)
|
||||
; CHECK: liveins: $sgpr6_sgpr7
|
||||
; CHECK: undef %0.sub3:vreg_512 = V_MOV_B32_e32 0, implicit $exec
|
||||
; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; CHECK: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 0, [[V_MOV_B32_e32_]], implicit $exec
|
||||
; CHECK: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; CHECK: [[COPY:%[0-9]+]]:vreg_512 = COPY %0
|
||||
; CHECK: bb.1:
|
||||
; CHECK: successors: %bb.1(0x80000000)
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN %0.sub3, undef %5:vgpr_32, $sgpr24_sgpr25_sgpr26_sgpr27, $sgpr32, 0, 0, 0, 0, 0, implicit $exec :: (store 4, align 8, addrspace 5)
|
||||
; CHECK: dead %6:vgpr_32 = DS_READ_B32_gfx9 undef %7:vgpr_32, 0, 0, implicit $exec
|
||||
; CHECK: dead %8:vreg_64 = DS_READ_B64_gfx9 [[V_MOV_B32_e32_]], 0, 0, implicit $exec
|
||||
; CHECK: dead %9:vreg_128 = DS_READ_B128_gfx9 [[V_ADD_U32_e32_]], 0, 0, implicit $exec
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
|
||||
; CHECK: undef %11.sub1:vreg_512 = COPY [[COPY]].sub1
|
||||
; CHECK: INLINEASM &"", 1, 851978, def dead [[COPY1]], 851978, def dead [[COPY]].sub1, 2147483657, [[COPY1]], 2147549193, [[COPY]].sub1
|
||||
; CHECK: %11.sub0:vreg_512 = COPY [[COPY]].sub0
|
||||
; CHECK: %11.sub3:vreg_512 = COPY [[COPY]].sub3
|
||||
; CHECK: dead %10:vgpr_32 = V_ADD_I32_e32 4, [[V_MOV_B32_e32_1]], implicit-def dead $vcc, implicit $exec
|
||||
; CHECK: %11.sub2:vreg_512 = COPY undef [[V_MOV_B32_e32_]]
|
||||
; CHECK: %11.sub5:vreg_512 = COPY undef [[V_MOV_B32_e32_]]
|
||||
; CHECK: [[COPY2:%[0-9]+]]:vreg_512 = COPY %11
|
||||
; CHECK: S_BRANCH %bb.1
|
||||
bb.0:
|
||||
liveins: $sgpr6_sgpr7
|
||||
|
||||
undef %0.sub3:vreg_512 = V_MOV_B32_e32 0, implicit $exec
|
||||
%1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
%2:vgpr_32 = V_ADD_U32_e32 0, %1, implicit $exec
|
||||
%3:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
%4:vreg_512 = COPY %0
|
||||
|
||||
bb.1:
|
||||
BUFFER_STORE_DWORD_OFFEN %0.sub3, undef %5:vgpr_32, $sgpr24_sgpr25_sgpr26_sgpr27, $sgpr32, 0, 0, 0, 0, 0, implicit $exec :: (store 4, align 8, addrspace 5)
|
||||
%6:vgpr_32 = DS_READ_B32_gfx9 undef %7:vgpr_32, 0, 0, implicit $exec
|
||||
%8:vreg_64 = DS_READ_B64_gfx9 %1, 0, 0, implicit $exec
|
||||
%9:vreg_128 = DS_READ_B128_gfx9 %2, 0, 0, implicit $exec
|
||||
%10:vgpr_32 = V_ADD_I32_e32 4, %3, implicit-def dead $vcc, implicit $exec
|
||||
undef %11.sub0:vreg_512 = COPY %4.sub0
|
||||
%12:vgpr_32 = COPY %4.sub0
|
||||
%11.sub1:vreg_512 = COPY %4.sub1
|
||||
INLINEASM &"", 1, 851978, def dead %12, 851978, def dead %4.sub1, 2147483657, %12, 2147549193, %4.sub1
|
||||
%11.sub2:vreg_512 = COPY undef %1
|
||||
%11.sub3:vreg_512 = COPY %4.sub3
|
||||
%11.sub5:vreg_512 = COPY undef %1
|
||||
%4:vreg_512 = COPY %11
|
||||
S_BRANCH %bb.1
|
||||
|
||||
...
|
|
@ -1,31 +0,0 @@
|
|||
// RUN: llvm-tblgen -gen-global-isel -optimize-match-table=false -I %p/Common -I %p/../../include %s -o - < %s | FileCheck -check-prefix=GISEL %s
|
||||
|
||||
include "llvm/Target/Target.td"
|
||||
include "GlobalISelEmitterCommon.td"
|
||||
|
||||
let TargetPrefix = "mytarget" in {
|
||||
def int_mytarget_sleep0 : Intrinsic<[], [llvm_i32_ty], [ImmArg<0>]>;
|
||||
def int_mytarget_sleep1 : Intrinsic<[], [llvm_i32_ty], [ImmArg<0>]>;
|
||||
}
|
||||
|
||||
// GISEL: GIM_CheckOpcode, /*MI*/0, TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS,
|
||||
// GISEL-NEXT: // MIs[0] Operand 0
|
||||
// GISEL-NEXT: GIM_CheckIntrinsicID, /*MI*/0, /*Op*/0, Intrinsic::mytarget_sleep0,
|
||||
// GISEL-NEXT: // MIs[0] src
|
||||
// GISEL-NEXT: GIM_CheckIsImm, /*MI*/0, /*Op*/1,
|
||||
// GISEL-NEXT: // (intrinsic_void {{[0-9]+}}:{ *:[iPTR] }, (timm:{ *:[i32] }):$src) => (SLEEP0 (timm:{ *:[i32] }):$src)
|
||||
// GISEL-NEXT: GIR_BuildMI, /*InsnID*/0, /*Opcode*/MyTarget::SLEEP0,
|
||||
// GISEL-NEXT: GIR_Copy, /*NewInsnID*/0, /*OldInsnID*/0, /*OpIdx*/1, // src
|
||||
def SLEEP0 : I<(outs), (ins i32imm:$src),
|
||||
[(int_mytarget_sleep0 timm:$src)]
|
||||
>;
|
||||
|
||||
// Test for situation which was crashing in ARM patterns.
|
||||
def p_imm : Operand<i32>;
|
||||
def SLEEP1 : I<(outs), (ins p_imm:$src), []>;
|
||||
|
||||
// FIXME: This should not crash, but should it work or be an error?
|
||||
// def : Pat <
|
||||
// (int_mytarget_sleep1 timm:$src),
|
||||
// (SLEEP1 imm:$src)
|
||||
// >;
|
|
@ -1062,7 +1062,6 @@ public:
|
|||
IPM_Opcode,
|
||||
IPM_NumOperands,
|
||||
IPM_ImmPredicate,
|
||||
IPM_Imm,
|
||||
IPM_AtomicOrderingMMO,
|
||||
IPM_MemoryLLTSize,
|
||||
IPM_MemoryVsLLTSize,
|
||||
|
@ -1341,23 +1340,6 @@ public:
|
|||
}
|
||||
};
|
||||
|
||||
class ImmOperandMatcher : public OperandPredicateMatcher {
|
||||
public:
|
||||
ImmOperandMatcher(unsigned InsnVarID, unsigned OpIdx)
|
||||
: OperandPredicateMatcher(IPM_Imm, InsnVarID, OpIdx) {}
|
||||
|
||||
static bool classof(const PredicateMatcher *P) {
|
||||
return P->getKind() == IPM_Imm;
|
||||
}
|
||||
|
||||
void emitPredicateOpcodes(MatchTable &Table,
|
||||
RuleMatcher &Rule) const override {
|
||||
Table << MatchTable::Opcode("GIM_CheckIsImm") << MatchTable::Comment("MI")
|
||||
<< MatchTable::IntValue(InsnVarID) << MatchTable::Comment("Op")
|
||||
<< MatchTable::IntValue(OpIdx) << MatchTable::LineBreak;
|
||||
}
|
||||
};
|
||||
|
||||
/// Generates code to check that an operand is a G_CONSTANT with a particular
|
||||
/// int.
|
||||
class ConstantIntOperandMatcher : public OperandPredicateMatcher {
|
||||
|
@ -3812,10 +3794,6 @@ Error GlobalISelEmitter::importChildMatcher(RuleMatcher &Rule,
|
|||
OM.addPredicate<MBBOperandMatcher>();
|
||||
return Error::success();
|
||||
}
|
||||
if (SrcChild->getOperator()->getName() == "timm") {
|
||||
OM.addPredicate<ImmOperandMatcher>();
|
||||
return Error::success();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -3965,10 +3943,7 @@ Expected<action_iterator> GlobalISelEmitter::importExplicitUseRenderer(
|
|||
// rendered as operands.
|
||||
// FIXME: The target should be able to choose sign-extended when appropriate
|
||||
// (e.g. on Mips).
|
||||
if (DstChild->getOperator()->getName() == "timm") {
|
||||
DstMIBuilder.addRenderer<CopyRenderer>(DstChild->getName());
|
||||
return InsertPt;
|
||||
} else if (DstChild->getOperator()->getName() == "imm") {
|
||||
if (DstChild->getOperator()->getName() == "imm") {
|
||||
DstMIBuilder.addRenderer<CopyConstantAsImmRenderer>(DstChild->getName());
|
||||
return InsertPt;
|
||||
} else if (DstChild->getOperator()->getName() == "fpimm") {
|
||||
|
|
Loading…
Reference in New Issue