Revert r372285 "GlobalISel: Don't materialize immarg arguments to intrinsics"

This broke the Chromium build, causing it to fail with e.g.

  fatal error: error in backend: Cannot select: t362: v4i32 = X86ISD::VSHLI t392, Constant:i8<15>

See llvm-commits thread of r372285 for details.

This also reverts r372286, r372287, r372288, r372289, r372290, r372291,
r372292, r372293, r372296, and r372297, which seemed to depend on the
main commit.

> Encode them directly as an imm argument to G_INTRINSIC*.
>
> Since now intrinsics can now define what parameters are required to be
> immediates, avoid using registers for them. Intrinsics could
> potentially want a constant that isn't a legal register type. Also,
> since G_CONSTANT is subject to CSE and legalization, transforms could
> potentially obscure the value (and create extra work for the
> selector). The register bank of a G_CONSTANT is also meaningful, so
> this could throw off future folding and legalization logic for AMDGPU.
>
> This will be much more convenient to work with than needing to call
> getConstantVRegVal and checking if it may have failed for every
> constant intrinsic parameter. AMDGPU has quite a lot of intrinsics wth
> immarg operands, many of which need inspection during lowering. Having
> to find the value in a register is going to add a lot of boilerplate
> and waste compile time.
>
> SelectionDAG has always provided TargetConstant for constants which
> should not be legalized or materialized in a register. The distinction
> between Constant and TargetConstant was somewhat fuzzy, and there was
> no automatic way to force usage of TargetConstant for certain
> intrinsic parameters. They were both ultimately ConstantSDNode, and it
> was inconsistently used. It was quite easy to mis-select an
> instruction requiring an immediate. For SelectionDAG, start emitting
> TargetConstant for these arguments, and using timm to match them.
>
> Most of the work here is to cleanup target handling of constants. Some
> targets process intrinsics through intermediate custom nodes, which
> need to preserve TargetConstant usage to match the intrinsic
> expectation. Pattern inputs now need to distinguish whether a constant
> is merely compatible with an operand or whether it is mandatory.
>
> The GlobalISelEmitter needs to treat timm as a special case of a leaf
> node, simlar to MachineBasicBlock operands. This should also enable
> handling of patterns for some G_* instructions with immediates, like
> G_FENCE or G_EXTRACT.
>
> This does include a workaround for a crash in GlobalISelEmitter when
> ARM tries to uses "imm" in an output with a "timm" pattern source.

llvm-svn: 372314
This commit is contained in:
Hans Wennborg 2019-09-19 12:33:07 +00:00
parent 0cfb78e52a
commit 13bdae8541
79 changed files with 1364 additions and 5009 deletions

View File

@ -220,11 +220,6 @@ enum {
/// - OpIdx - Operand index /// - OpIdx - Operand index
GIM_CheckIsMBB, GIM_CheckIsMBB,
/// Check the specified operand is an Imm
/// - InsnID - Instruction ID
/// - OpIdx - Operand index
GIM_CheckIsImm,
/// Check if the specified operand is safe to fold into the current /// Check if the specified operand is safe to fold into the current
/// instruction. /// instruction.
/// - InsnID - Instruction ID /// - InsnID - Instruction ID

View File

@ -690,19 +690,7 @@ bool InstructionSelector::executeMatchTable(
} }
break; break;
} }
case GIM_CheckIsImm: {
int64_t InsnID = MatchTable[CurrentIdx++];
int64_t OpIdx = MatchTable[CurrentIdx++];
DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
dbgs() << CurrentIdx << ": GIM_CheckIsImm(MIs[" << InsnID
<< "]->getOperand(" << OpIdx << "))\n");
assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
if (!State.MIs[InsnID]->getOperand(OpIdx).isImm()) {
if (handleReject() == RejectAndGiveUp)
return false;
}
break;
}
case GIM_CheckIsSafeToFold: { case GIM_CheckIsSafeToFold: {
int64_t InsnID = MatchTable[CurrentIdx++]; int64_t InsnID = MatchTable[CurrentIdx++];
DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),

View File

@ -374,9 +374,6 @@ namespace llvm {
/// Returns a mask for which lanes get read/written by the given (register) /// Returns a mask for which lanes get read/written by the given (register)
/// machine operand. /// machine operand.
LaneBitmask getLaneMaskForMO(const MachineOperand &MO) const; LaneBitmask getLaneMaskForMO(const MachineOperand &MO) const;
/// Returns true if the def register in \p MO has no uses.
bool deadDefHasNoUse(const MachineOperand &MO);
}; };
/// Creates a new SUnit and return a ptr to it. /// Creates a new SUnit and return a ptr to it.

View File

@ -848,11 +848,6 @@ class ImmLeaf<ValueType vt, code pred, SDNodeXForm xform = NOOP_SDNodeXForm,
bit IsAPFloat = 0; bit IsAPFloat = 0;
} }
// Convenience wrapper for ImmLeaf to use timm/TargetConstant instead
// of imm/Constant.
class TImmLeaf<ValueType vt, code pred, SDNodeXForm xform = NOOP_SDNodeXForm,
SDNode ImmNode = timm> : ImmLeaf<vt, pred, xform, ImmNode>;
// An ImmLeaf except that Imm is an APInt. This is useful when you need to // An ImmLeaf except that Imm is an APInt. This is useful when you need to
// zero-extend the immediate instead of sign-extend it. // zero-extend the immediate instead of sign-extend it.
// //

View File

@ -1617,29 +1617,14 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
if (isa<FPMathOperator>(CI)) if (isa<FPMathOperator>(CI))
MIB->copyIRFlags(CI); MIB->copyIRFlags(CI);
for (auto &Arg : enumerate(CI.arg_operands())) { for (auto &Arg : CI.arg_operands()) {
// Some intrinsics take metadata parameters. Reject them. // Some intrinsics take metadata parameters. Reject them.
if (isa<MetadataAsValue>(Arg.value())) if (isa<MetadataAsValue>(Arg))
return false; return false;
ArrayRef<Register> VRegs = getOrCreateVRegs(*Arg);
// If this is required to be an immediate, don't materialize it in a if (VRegs.size() > 1)
// register. return false;
if (CI.paramHasAttr(Arg.index(), Attribute::ImmArg)) { MIB.addUse(VRegs[0]);
if (ConstantInt *CI = dyn_cast<ConstantInt>(Arg.value())) {
// imm arguments are more convenient than cimm (and realistically
// probably sufficient), so use them.
assert(CI->getBitWidth() <= 64 &&
"large intrinsic immediates not handled");
MIB.addImm(CI->getSExtValue());
} else {
MIB.addFPImm(cast<ConstantFP>(Arg.value()));
}
} else {
ArrayRef<Register> VRegs = getOrCreateVRegs(*Arg.value());
if (VRegs.size() > 1)
return false;
MIB.addUse(VRegs[0]);
}
} }
// Add a MachineMemOperand if it is a target mem intrinsic. // Add a MachineMemOperand if it is a target mem intrinsic.

View File

@ -373,13 +373,6 @@ LaneBitmask ScheduleDAGInstrs::getLaneMaskForMO(const MachineOperand &MO) const
return TRI->getSubRegIndexLaneMask(SubReg); return TRI->getSubRegIndexLaneMask(SubReg);
} }
bool ScheduleDAGInstrs::deadDefHasNoUse(const MachineOperand &MO) {
auto RegUse = CurrentVRegUses.find(MO.getReg());
if (RegUse == CurrentVRegUses.end())
return true;
return (RegUse->LaneMask & getLaneMaskForMO(MO)).none();
}
/// Adds register output and data dependencies from this SUnit to instructions /// Adds register output and data dependencies from this SUnit to instructions
/// that occur later in the same scheduling region if they read from or write to /// that occur later in the same scheduling region if they read from or write to
/// the virtual register defined at OperIdx. /// the virtual register defined at OperIdx.
@ -409,7 +402,8 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
} }
if (MO.isDead()) { if (MO.isDead()) {
assert(deadDefHasNoUse(MO) && "Dead defs should have no uses"); assert(CurrentVRegUses.find(Reg) == CurrentVRegUses.end() &&
"Dead defs should have no uses");
} else { } else {
// Add data dependence to all uses we found so far. // Add data dependence to all uses we found so far.
const TargetSubtargetInfo &ST = MF.getSubtarget(); const TargetSubtargetInfo &ST = MF.getSubtarget();

View File

@ -4768,22 +4768,8 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
// Add all operands of the call to the operand list. // Add all operands of the call to the operand list.
for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) { for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
const Value *Arg = I.getArgOperand(i); SDValue Op = getValue(I.getArgOperand(i));
if (!I.paramHasAttr(i, Attribute::ImmArg)) { Ops.push_back(Op);
Ops.push_back(getValue(Arg));
continue;
}
// Use TargetConstant instead of a regular constant for immarg.
EVT VT = TLI.getValueType(*DL, Arg->getType(), true);
if (const ConstantInt *CI = dyn_cast<ConstantInt>(Arg)) {
assert(CI->getBitWidth() <= 64 &&
"large intrinsic immediates not handled");
Ops.push_back(DAG.getTargetConstant(*CI, SDLoc(), VT));
} else {
Ops.push_back(
DAG.getTargetConstantFP(*cast<ConstantFP>(Arg), SDLoc(), VT));
}
} }
SmallVector<EVT, 4> ValueVTs; SmallVector<EVT, 4> ValueVTs;

View File

@ -685,11 +685,11 @@ def logical_imm64_not : Operand<i64> {
// iXX_imm0_65535 predicates - True if the immediate is in the range [0,65535]. // iXX_imm0_65535 predicates - True if the immediate is in the range [0,65535].
let ParserMatchClass = AsmImmRange<0, 65535>, PrintMethod = "printImmHex" in { let ParserMatchClass = AsmImmRange<0, 65535>, PrintMethod = "printImmHex" in {
def i32_imm0_65535 : Operand<i32>, TImmLeaf<i32, [{ def i32_imm0_65535 : Operand<i32>, ImmLeaf<i32, [{
return ((uint32_t)Imm) < 65536; return ((uint32_t)Imm) < 65536;
}]>; }]>;
def i64_imm0_65535 : Operand<i64>, TImmLeaf<i64, [{ def i64_imm0_65535 : Operand<i64>, ImmLeaf<i64, [{
return ((uint64_t)Imm) < 65536; return ((uint64_t)Imm) < 65536;
}]>; }]>;
} }

View File

@ -798,7 +798,7 @@ def MOVbaseTLS : Pseudo<(outs GPR64:$dst), (ins),
let Uses = [ X9 ], Defs = [ X16, X17, LR, NZCV ] in { let Uses = [ X9 ], Defs = [ X16, X17, LR, NZCV ] in {
def HWASAN_CHECK_MEMACCESS : Pseudo< def HWASAN_CHECK_MEMACCESS : Pseudo<
(outs), (ins GPR64noip:$ptr, i32imm:$accessinfo), (outs), (ins GPR64noip:$ptr, i32imm:$accessinfo),
[(int_hwasan_check_memaccess X9, GPR64noip:$ptr, (i32 timm:$accessinfo))]>, [(int_hwasan_check_memaccess X9, GPR64noip:$ptr, (i32 imm:$accessinfo))]>,
Sched<[]>; Sched<[]>;
} }

View File

@ -22,7 +22,6 @@
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h" #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h" #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBasicBlock.h"
@ -246,6 +245,10 @@ AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO,
} }
} }
static int64_t getConstant(const MachineInstr *MI) {
return MI->getOperand(1).getCImm()->getSExtValue();
}
static unsigned getLogicalBitOpcode(unsigned Opc, bool Is64) { static unsigned getLogicalBitOpcode(unsigned Opc, bool Is64) {
switch (Opc) { switch (Opc) {
case AMDGPU::G_AND: case AMDGPU::G_AND:
@ -734,260 +737,6 @@ buildEXP(const TargetInstrInfo &TII, MachineInstr *Insert, unsigned Tgt,
.addImm(Enabled); .addImm(Enabled);
} }
static bool isZero(Register Reg, MachineRegisterInfo &MRI) {
int64_t C;
if (mi_match(Reg, MRI, m_ICst(C)) && C == 0)
return true;
// FIXME: matcher should ignore copies
return mi_match(Reg, MRI, m_Copy(m_ICst(C))) && C == 0;
}
static unsigned extractGLC(unsigned CachePolicy) {
return CachePolicy & 1;
}
static unsigned extractSLC(unsigned CachePolicy) {
return (CachePolicy >> 1) & 1;
}
static unsigned extractDLC(unsigned CachePolicy) {
return (CachePolicy >> 2) & 1;
}
// Returns Base register, constant offset, and offset def point.
static std::tuple<Register, unsigned, MachineInstr *>
getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg) {
MachineInstr *Def = getDefIgnoringCopies(Reg, MRI);
if (!Def)
return {Reg, 0, nullptr};
if (Def->getOpcode() == AMDGPU::G_CONSTANT) {
unsigned Offset;
const MachineOperand &Op = Def->getOperand(1);
if (Op.isImm())
Offset = Op.getImm();
else
Offset = Op.getCImm()->getZExtValue();
return {Register(), Offset, Def};
}
int64_t Offset;
if (Def->getOpcode() == AMDGPU::G_ADD) {
// TODO: Handle G_OR used for add case
if (mi_match(Def->getOperand(1).getReg(), MRI, m_ICst(Offset)))
return {Def->getOperand(0).getReg(), Offset, Def};
// FIXME: matcher should ignore copies
if (mi_match(Def->getOperand(1).getReg(), MRI, m_Copy(m_ICst(Offset))))
return {Def->getOperand(0).getReg(), Offset, Def};
}
return {Reg, 0, Def};
}
static unsigned getBufferStoreOpcode(LLT Ty,
const unsigned MemSize,
const bool Offen) {
const int Size = Ty.getSizeInBits();
switch (8 * MemSize) {
case 8:
return Offen ? AMDGPU::BUFFER_STORE_BYTE_OFFEN_exact :
AMDGPU::BUFFER_STORE_BYTE_OFFSET_exact;
case 16:
return Offen ? AMDGPU::BUFFER_STORE_SHORT_OFFEN_exact :
AMDGPU::BUFFER_STORE_SHORT_OFFSET_exact;
default:
unsigned Opc = Offen ? AMDGPU::BUFFER_STORE_DWORD_OFFEN_exact :
AMDGPU::BUFFER_STORE_DWORD_OFFSET_exact;
if (Size > 32)
Opc = AMDGPU::getMUBUFOpcode(Opc, Size / 32);
return Opc;
}
}
static unsigned getBufferStoreFormatOpcode(LLT Ty,
const unsigned MemSize,
const bool Offen) {
bool IsD16Packed = Ty.getScalarSizeInBits() == 16;
bool IsD16Unpacked = 8 * MemSize < Ty.getSizeInBits();
int NumElts = Ty.isVector() ? Ty.getNumElements() : 1;
if (IsD16Packed) {
switch (NumElts) {
case 1:
return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_X_OFFEN_exact :
AMDGPU::BUFFER_STORE_FORMAT_D16_X_OFFSET_exact;
case 2:
return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact :
AMDGPU::BUFFER_STORE_FORMAT_D16_XY_OFFSET_exact;
case 3:
return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XYZ_OFFEN_exact :
AMDGPU::BUFFER_STORE_FORMAT_D16_XYZ_OFFSET_exact;
case 4:
return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact :
AMDGPU::BUFFER_STORE_FORMAT_D16_XYZW_OFFSET_exact;
default:
return -1;
}
}
if (IsD16Unpacked) {
switch (NumElts) {
case 1:
return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_X_OFFEN_exact :
AMDGPU::BUFFER_STORE_FORMAT_D16_X_OFFSET_exact;
case 2:
return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact :
AMDGPU::BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFSET_exact;
case 3:
return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XYZ_gfx80_OFFEN_exact :
AMDGPU::BUFFER_STORE_FORMAT_D16_XYZ_gfx80_OFFSET_exact;
case 4:
return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact :
AMDGPU::BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFSET_exact;
default:
return -1;
}
}
switch (NumElts) {
case 1:
return Offen ? AMDGPU::BUFFER_STORE_FORMAT_X_OFFEN_exact :
AMDGPU::BUFFER_STORE_FORMAT_X_OFFSET_exact;
case 2:
return Offen ? AMDGPU::BUFFER_STORE_FORMAT_XY_OFFEN_exact :
AMDGPU::BUFFER_STORE_FORMAT_XY_OFFSET_exact;
case 3:
return Offen ? AMDGPU::BUFFER_STORE_FORMAT_XYZ_OFFEN_exact :
AMDGPU::BUFFER_STORE_FORMAT_XYZ_OFFSET_exact;
case 4:
return Offen ? AMDGPU::BUFFER_STORE_FORMAT_XYZW_OFFEN_exact :
AMDGPU::BUFFER_STORE_FORMAT_XYZW_OFFSET_exact;
default:
return -1;
}
llvm_unreachable("unhandled buffer store");
}
// TODO: Move this to combiner
// Returns base register, imm offset, total constant offset.
std::tuple<Register, unsigned, unsigned>
AMDGPUInstructionSelector::splitBufferOffsets(MachineIRBuilder &B,
Register OrigOffset) const {
const unsigned MaxImm = 4095;
Register BaseReg;
unsigned TotalConstOffset;
MachineInstr *OffsetDef;
MachineRegisterInfo &MRI = *B.getMRI();
std::tie(BaseReg, TotalConstOffset, OffsetDef)
= getBaseWithConstantOffset(MRI, OrigOffset);
unsigned ImmOffset = TotalConstOffset;
// If the immediate value is too big for the immoffset field, put the value
// and -4096 into the immoffset field so that the value that is copied/added
// for the voffset field is a multiple of 4096, and it stands more chance
// of being CSEd with the copy/add for another similar load/store.f
// However, do not do that rounding down to a multiple of 4096 if that is a
// negative number, as it appears to be illegal to have a negative offset
// in the vgpr, even if adding the immediate offset makes it positive.
unsigned Overflow = ImmOffset & ~MaxImm;
ImmOffset -= Overflow;
if ((int32_t)Overflow < 0) {
Overflow += ImmOffset;
ImmOffset = 0;
}
if (Overflow != 0) {
// In case this is in a waterfall loop, insert offset code at the def point
// of the offset, not inside the loop.
MachineBasicBlock::iterator OldInsPt = B.getInsertPt();
MachineBasicBlock &OldMBB = B.getMBB();
B.setInstr(*OffsetDef);
if (!BaseReg) {
BaseReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
B.buildInstr(AMDGPU::V_MOV_B32_e32)
.addDef(BaseReg)
.addImm(Overflow);
} else {
Register OverflowVal = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
B.buildInstr(AMDGPU::V_MOV_B32_e32)
.addDef(OverflowVal)
.addImm(Overflow);
Register NewBaseReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
TII.getAddNoCarry(B.getMBB(), B.getInsertPt(), B.getDebugLoc(), NewBaseReg)
.addReg(BaseReg)
.addReg(OverflowVal, RegState::Kill)
.addImm(0);
BaseReg = NewBaseReg;
}
B.setInsertPt(OldMBB, OldInsPt);
}
return {BaseReg, ImmOffset, TotalConstOffset};
}
bool AMDGPUInstructionSelector::selectStoreIntrinsic(MachineInstr &MI,
bool IsFormat) const {
MachineIRBuilder B(MI);
MachineRegisterInfo &MRI = *B.getMRI();
MachineFunction &MF = B.getMF();
Register VData = MI.getOperand(1).getReg();
LLT Ty = MRI.getType(VData);
int Size = Ty.getSizeInBits();
if (Size % 32 != 0)
return false;
// FIXME: Verifier should enforce 1 MMO for these intrinsics.
MachineMemOperand *MMO = *MI.memoperands_begin();
const int MemSize = MMO->getSize();
Register RSrc = MI.getOperand(2).getReg();
Register VOffset = MI.getOperand(3).getReg();
Register SOffset = MI.getOperand(4).getReg();
unsigned CachePolicy = MI.getOperand(5).getImm();
unsigned ImmOffset;
unsigned TotalOffset;
std::tie(VOffset, ImmOffset, TotalOffset) = splitBufferOffsets(B, VOffset);
if (TotalOffset != 0)
MMO = MF.getMachineMemOperand(MMO, TotalOffset, MemSize);
const bool Offen = !isZero(VOffset, MRI);
int Opc = IsFormat ? getBufferStoreFormatOpcode(Ty, MemSize, Offen) :
getBufferStoreOpcode(Ty, MemSize, Offen);
if (Opc == -1)
return false;
MachineInstrBuilder MIB = B.buildInstr(Opc)
.addUse(VData);
if (Offen)
MIB.addUse(VOffset);
MIB.addUse(RSrc)
.addUse(SOffset)
.addImm(ImmOffset)
.addImm(extractGLC(CachePolicy))
.addImm(extractSLC(CachePolicy))
.addImm(0) // tfe: FIXME: Remove from inst
.addImm(extractDLC(CachePolicy))
.addMemOperand(MMO);
MI.eraseFromParent();
return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
}
bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS( bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
MachineInstr &I) const { MachineInstr &I) const {
MachineBasicBlock *BB = I.getParent(); MachineBasicBlock *BB = I.getParent();
@ -997,10 +746,10 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
unsigned IntrinsicID = I.getIntrinsicID(); unsigned IntrinsicID = I.getIntrinsicID();
switch (IntrinsicID) { switch (IntrinsicID) {
case Intrinsic::amdgcn_exp: { case Intrinsic::amdgcn_exp: {
int64_t Tgt = I.getOperand(1).getImm(); int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg()));
int64_t Enabled = I.getOperand(2).getImm(); int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg()));
int64_t Done = I.getOperand(7).getImm(); int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(7).getReg()));
int64_t VM = I.getOperand(8).getImm(); int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(8).getReg()));
MachineInstr *Exp = buildEXP(TII, &I, Tgt, I.getOperand(3).getReg(), MachineInstr *Exp = buildEXP(TII, &I, Tgt, I.getOperand(3).getReg(),
I.getOperand(4).getReg(), I.getOperand(4).getReg(),
@ -1013,13 +762,13 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
} }
case Intrinsic::amdgcn_exp_compr: { case Intrinsic::amdgcn_exp_compr: {
const DebugLoc &DL = I.getDebugLoc(); const DebugLoc &DL = I.getDebugLoc();
int64_t Tgt = I.getOperand(1).getImm(); int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg()));
int64_t Enabled = I.getOperand(2).getImm(); int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg()));
Register Reg0 = I.getOperand(3).getReg(); Register Reg0 = I.getOperand(3).getReg();
Register Reg1 = I.getOperand(4).getReg(); Register Reg1 = I.getOperand(4).getReg();
Register Undef = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); Register Undef = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
int64_t Done = I.getOperand(5).getImm(); int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(5).getReg()));
int64_t VM = I.getOperand(6).getImm(); int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(6).getReg()));
BuildMI(*BB, &I, DL, TII.get(AMDGPU::IMPLICIT_DEF), Undef); BuildMI(*BB, &I, DL, TII.get(AMDGPU::IMPLICIT_DEF), Undef);
MachineInstr *Exp = buildEXP(TII, &I, Tgt, Reg0, Reg1, Undef, Undef, VM, MachineInstr *Exp = buildEXP(TII, &I, Tgt, Reg0, Reg1, Undef, Undef, VM,
@ -1042,10 +791,6 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
MRI.setRegClass(Reg, TRI.getWaveMaskRegClass()); MRI.setRegClass(Reg, TRI.getWaveMaskRegClass());
return true; return true;
} }
case Intrinsic::amdgcn_raw_buffer_store:
return selectStoreIntrinsic(I, false);
case Intrinsic::amdgcn_raw_buffer_store_format:
return selectStoreIntrinsic(I, true);
default: default:
return selectImpl(I, *CoverageInfo); return selectImpl(I, *CoverageInfo);
} }

View File

@ -35,7 +35,6 @@ class AMDGPUInstrInfo;
class AMDGPURegisterBankInfo; class AMDGPURegisterBankInfo;
class GCNSubtarget; class GCNSubtarget;
class MachineInstr; class MachineInstr;
class MachineIRBuilder;
class MachineOperand; class MachineOperand;
class MachineRegisterInfo; class MachineRegisterInfo;
class SIInstrInfo; class SIInstrInfo;
@ -83,12 +82,6 @@ private:
bool selectG_IMPLICIT_DEF(MachineInstr &I) const; bool selectG_IMPLICIT_DEF(MachineInstr &I) const;
bool selectG_INSERT(MachineInstr &I) const; bool selectG_INSERT(MachineInstr &I) const;
bool selectG_INTRINSIC(MachineInstr &I) const; bool selectG_INTRINSIC(MachineInstr &I) const;
std::tuple<Register, unsigned, unsigned>
splitBufferOffsets(MachineIRBuilder &B, Register OrigOffset) const;
bool selectStoreIntrinsic(MachineInstr &MI, bool IsFormat) const;
bool selectG_INTRINSIC_W_SIDE_EFFECTS(MachineInstr &I) const; bool selectG_INTRINSIC_W_SIDE_EFFECTS(MachineInstr &I) const;
int getS_CMPOpcode(CmpInst::Predicate P, unsigned Size) const; int getS_CMPOpcode(CmpInst::Predicate P, unsigned Size) const;
bool selectG_ICMP(MachineInstr &I) const; bool selectG_ICMP(MachineInstr &I) const;

View File

@ -1751,62 +1751,6 @@ bool AMDGPULegalizerInfo::legalizeIsAddrSpace(MachineInstr &MI,
return true; return true;
} }
/// Handle register layout difference for f16 images for some subtargets.
Register AMDGPULegalizerInfo::handleD16VData(MachineIRBuilder &B,
MachineRegisterInfo &MRI,
Register Reg) const {
if (!ST.hasUnpackedD16VMem())
return Reg;
const LLT S16 = LLT::scalar(16);
const LLT S32 = LLT::scalar(32);
LLT StoreVT = MRI.getType(Reg);
assert(StoreVT.isVector() && StoreVT.getElementType() == S16);
auto Unmerge = B.buildUnmerge(S16, Reg);
SmallVector<Register, 4> WideRegs;
for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
WideRegs.push_back(B.buildAnyExt(S32, Unmerge.getReg(I)).getReg(0));
int NumElts = StoreVT.getNumElements();
return B.buildBuildVector(LLT::vector(NumElts, S32), WideRegs).getReg(0);
}
bool AMDGPULegalizerInfo::legalizeRawBufferStore(MachineInstr &MI,
MachineRegisterInfo &MRI,
MachineIRBuilder &B,
bool IsFormat) const {
// TODO: Reject f16 format on targets where unsupported.
Register VData = MI.getOperand(1).getReg();
LLT Ty = MRI.getType(VData);
B.setInstr(MI);
const LLT S32 = LLT::scalar(32);
const LLT S16 = LLT::scalar(16);
// Fixup illegal register types for i8 stores.
if (Ty == LLT::scalar(8) || Ty == S16) {
Register AnyExt = B.buildAnyExt(LLT::scalar(32), VData).getReg(0);
MI.getOperand(1).setReg(AnyExt);
return true;
}
if (Ty.isVector()) {
if (Ty.getElementType() == S16 && Ty.getNumElements() <= 4) {
if (IsFormat)
MI.getOperand(1).setReg(handleD16VData(B, MRI, VData));
return true;
}
return Ty.getElementType() == S32 && Ty.getNumElements() <= 4;
}
return Ty == S32;
}
bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI, bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
MachineRegisterInfo &MRI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const { MachineIRBuilder &B) const {
@ -1899,10 +1843,6 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
MI.eraseFromParent(); MI.eraseFromParent();
return true; return true;
} }
case Intrinsic::amdgcn_raw_buffer_store:
return legalizeRawBufferStore(MI, MRI, B, false);
case Intrinsic::amdgcn_raw_buffer_store_format:
return legalizeRawBufferStore(MI, MRI, B, true);
default: default:
return true; return true;
} }

View File

@ -83,11 +83,6 @@ public:
MachineIRBuilder &B) const; MachineIRBuilder &B) const;
bool legalizeIsAddrSpace(MachineInstr &MI, MachineRegisterInfo &MRI, bool legalizeIsAddrSpace(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B, unsigned AddrSpace) const; MachineIRBuilder &B, unsigned AddrSpace) const;
Register handleD16VData(MachineIRBuilder &B, MachineRegisterInfo &MRI,
Register Reg) const;
bool legalizeRawBufferStore(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B, bool IsFormat) const;
bool legalizeIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI, bool legalizeIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const override; MachineIRBuilder &B) const override;

View File

@ -20,7 +20,6 @@
#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallSet.h"
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/RegisterBank.h" #include "llvm/CodeGen/GlobalISel/RegisterBank.h"
#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h"
@ -34,7 +33,6 @@
#include "AMDGPUGenRegisterBankInfo.def" #include "AMDGPUGenRegisterBankInfo.def"
using namespace llvm; using namespace llvm;
using namespace MIPatternMatch;
namespace { namespace {
@ -86,11 +84,9 @@ public:
}; };
} }
AMDGPURegisterBankInfo::AMDGPURegisterBankInfo(const GCNSubtarget &ST) AMDGPURegisterBankInfo::AMDGPURegisterBankInfo(const TargetRegisterInfo &TRI)
: AMDGPUGenRegisterBankInfo(), : AMDGPUGenRegisterBankInfo(),
Subtarget(ST), TRI(static_cast<const SIRegisterInfo*>(&TRI)) {
TRI(Subtarget.getRegisterInfo()),
TII(Subtarget.getInstrInfo()) {
// HACK: Until this is fully tablegen'd. // HACK: Until this is fully tablegen'd.
static bool AlreadyInit = false; static bool AlreadyInit = false;
@ -642,10 +638,8 @@ static LLT getHalfSizedType(LLT Ty) {
/// ///
/// There is additional complexity to try for compare values to identify the /// There is additional complexity to try for compare values to identify the
/// unique values used. /// unique values used.
bool AMDGPURegisterBankInfo::executeInWaterfallLoop( void AMDGPURegisterBankInfo::executeInWaterfallLoop(
MachineIRBuilder &B, MachineInstr &MI, MachineRegisterInfo &MRI,
MachineInstr &MI,
MachineRegisterInfo &MRI,
ArrayRef<unsigned> OpIndices) const { ArrayRef<unsigned> OpIndices) const {
MachineFunction *MF = MI.getParent()->getParent(); MachineFunction *MF = MI.getParent()->getParent();
const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>(); const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
@ -668,8 +662,9 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
// No operands need to be replaced, so no need to loop. // No operands need to be replaced, so no need to loop.
if (SGPROperandRegs.empty()) if (SGPROperandRegs.empty())
return false; return;
MachineIRBuilder B(MI);
SmallVector<Register, 4> ResultRegs; SmallVector<Register, 4> ResultRegs;
SmallVector<Register, 4> InitResultRegs; SmallVector<Register, 4> InitResultRegs;
SmallVector<Register, 4> PhiRegs; SmallVector<Register, 4> PhiRegs;
@ -927,18 +922,6 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
B.buildInstr(AMDGPU::S_MOV_B64_term) B.buildInstr(AMDGPU::S_MOV_B64_term)
.addDef(AMDGPU::EXEC) .addDef(AMDGPU::EXEC)
.addReg(SaveExecReg); .addReg(SaveExecReg);
// Restore the insert point before the original instruction.
B.setInsertPt(MBB, MBB.end());
return true;
}
bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
MachineInstr &MI, MachineRegisterInfo &MRI,
ArrayRef<unsigned> OpIndices) const {
MachineIRBuilder B(MI);
return executeInWaterfallLoop(B, MI, MRI, OpIndices);
} }
// Legalize an operand that must be an SGPR by inserting a readfirstlane. // Legalize an operand that must be an SGPR by inserting a readfirstlane.
@ -1048,33 +1031,6 @@ bool AMDGPURegisterBankInfo::applyMappingWideLoad(MachineInstr &MI,
return true; return true;
} }
bool AMDGPURegisterBankInfo::applyMappingImage(
MachineInstr &MI, const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper,
MachineRegisterInfo &MRI, int RsrcIdx) const {
const int NumDefs = MI.getNumExplicitDefs();
// The reported argument index is relative to the IR intrinsic call arguments,
// so we need to shift by the number of defs and the intrinsic ID.
RsrcIdx += NumDefs + 1;
// Insert copies to VGPR arguments.
applyDefaultMapping(OpdMapper);
// Fixup any SGPR arguments.
SmallVector<unsigned, 4> SGPRIndexes;
for (int I = NumDefs, NumOps = MI.getNumOperands(); I != NumOps; ++I) {
if (!MI.getOperand(I).isReg())
continue;
// If this intrinsic has a sampler, it immediately follows rsrc.
if (I == RsrcIdx || I == RsrcIdx + 1)
SGPRIndexes.push_back(I);
}
executeInWaterfallLoop(MI, MRI, SGPRIndexes);
return true;
}
// For cases where only a single copy is inserted for matching register banks. // For cases where only a single copy is inserted for matching register banks.
// Replace the register in the instruction operand // Replace the register in the instruction operand
static void substituteSimpleCopyRegs( static void substituteSimpleCopyRegs(
@ -1086,184 +1042,6 @@ static void substituteSimpleCopyRegs(
} }
} }
/// Handle register layout difference for f16 images for some subtargets.
Register AMDGPURegisterBankInfo::handleD16VData(MachineIRBuilder &B,
MachineRegisterInfo &MRI,
Register Reg) const {
if (!Subtarget.hasUnpackedD16VMem())
return Reg;
const LLT S16 = LLT::scalar(16);
LLT StoreVT = MRI.getType(Reg);
if (!StoreVT.isVector() || StoreVT.getElementType() != S16)
return Reg;
auto Unmerge = B.buildUnmerge(S16, Reg);
SmallVector<Register, 4> WideRegs;
for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
WideRegs.push_back(Unmerge.getReg(I));
const LLT S32 = LLT::scalar(32);
int NumElts = StoreVT.getNumElements();
return B.buildMerge(LLT::vector(NumElts, S32), WideRegs).getReg(0);
}
static std::pair<Register, unsigned>
getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg) {
int64_t Const;
if (mi_match(Reg, MRI, m_ICst(Const)))
return std::make_pair(Register(), Const);
Register Base;
if (mi_match(Reg, MRI, m_GAdd(m_Reg(Base), m_ICst(Const))))
return std::make_pair(Base, Const);
// TODO: Handle G_OR used for add case
return std::make_pair(Reg, 0);
}
std::pair<Register, unsigned>
AMDGPURegisterBankInfo::splitBufferOffsets(MachineIRBuilder &B,
Register OrigOffset) const {
const unsigned MaxImm = 4095;
Register BaseReg;
unsigned ImmOffset;
const LLT S32 = LLT::scalar(32);
std::tie(BaseReg, ImmOffset) = getBaseWithConstantOffset(*B.getMRI(),
OrigOffset);
unsigned C1 = 0;
if (ImmOffset != 0) {
// If the immediate value is too big for the immoffset field, put the value
// and -4096 into the immoffset field so that the value that is copied/added
// for the voffset field is a multiple of 4096, and it stands more chance
// of being CSEd with the copy/add for another similar load/store.
// However, do not do that rounding down to a multiple of 4096 if that is a
// negative number, as it appears to be illegal to have a negative offset
// in the vgpr, even if adding the immediate offset makes it positive.
unsigned Overflow = ImmOffset & ~MaxImm;
ImmOffset -= Overflow;
if ((int32_t)Overflow < 0) {
Overflow += ImmOffset;
ImmOffset = 0;
}
C1 = ImmOffset;
if (Overflow != 0) {
if (!BaseReg)
BaseReg = B.buildConstant(S32, Overflow).getReg(0);
else {
auto OverflowVal = B.buildConstant(S32, Overflow);
BaseReg = B.buildAdd(S32, BaseReg, OverflowVal).getReg(0);
}
}
}
if (!BaseReg)
BaseReg = B.buildConstant(S32, 0).getReg(0);
return {BaseReg, C1};
}
static bool isZero(Register Reg, MachineRegisterInfo &MRI) {
int64_t C;
return mi_match(Reg, MRI, m_ICst(C)) && C == 0;
}
static unsigned extractGLC(unsigned CachePolicy) {
return CachePolicy & 1;
}
static unsigned extractSLC(unsigned CachePolicy) {
return (CachePolicy >> 1) & 1;
}
static unsigned extractDLC(unsigned CachePolicy) {
return (CachePolicy >> 2) & 1;
}
MachineInstr *
AMDGPURegisterBankInfo::selectStoreIntrinsic(MachineIRBuilder &B,
MachineInstr &MI) const {
MachineRegisterInfo &MRI = *B.getMRI();
executeInWaterfallLoop(B, MI, MRI, {2, 4});
// FIXME: DAG lowering brokenly changes opcode based on FP vs. integer.
Register VData = MI.getOperand(1).getReg();
LLT Ty = MRI.getType(VData);
int EltSize = Ty.getScalarSizeInBits();
int Size = Ty.getSizeInBits();
// FIXME: Broken integer truncstore.
if (EltSize != 32)
report_fatal_error("unhandled intrinsic store");
// FIXME: Verifier should enforce 1 MMO for these intrinsics.
const int MemSize = (*MI.memoperands_begin())->getSize();
Register RSrc = MI.getOperand(2).getReg();
Register VOffset = MI.getOperand(3).getReg();
Register SOffset = MI.getOperand(4).getReg();
unsigned CachePolicy = MI.getOperand(5).getImm();
unsigned ImmOffset;
std::tie(VOffset, ImmOffset) = splitBufferOffsets(B, VOffset);
const bool Offen = !isZero(VOffset, MRI);
unsigned Opc = AMDGPU::BUFFER_STORE_DWORD_OFFEN_exact;
switch (8 * MemSize) {
case 8:
Opc = Offen ? AMDGPU::BUFFER_STORE_BYTE_OFFEN_exact :
AMDGPU::BUFFER_STORE_BYTE_OFFSET_exact;
break;
case 16:
Opc = Offen ? AMDGPU::BUFFER_STORE_SHORT_OFFEN_exact :
AMDGPU::BUFFER_STORE_SHORT_OFFSET_exact;
break;
default:
Opc = Offen ? AMDGPU::BUFFER_STORE_DWORD_OFFEN_exact :
AMDGPU::BUFFER_STORE_DWORD_OFFSET_exact;
if (Size > 32)
Opc = AMDGPU::getMUBUFOpcode(Opc, Size / 32);
break;
}
// Set the insertion point back to the instruction in case it was moved into a
// loop.
B.setInstr(MI);
MachineInstrBuilder MIB = B.buildInstr(Opc)
.addUse(VData);
if (Offen)
MIB.addUse(VOffset);
MIB.addUse(RSrc)
.addUse(SOffset)
.addImm(ImmOffset)
.addImm(extractGLC(CachePolicy))
.addImm(extractSLC(CachePolicy))
.addImm(0) // tfe: FIXME: Remove from inst
.addImm(extractDLC(CachePolicy))
.cloneMemRefs(MI);
// FIXME: We need a way to report failure from applyMappingImpl.
// Insert constrain copies before inserting the loop.
if (!constrainSelectedInstRegOperands(*MIB, *TII, *TRI, *this))
report_fatal_error("failed to constrain selected store intrinsic");
return MIB;
}
void AMDGPURegisterBankInfo::applyMappingImpl( void AMDGPURegisterBankInfo::applyMappingImpl(
const OperandsMapper &OpdMapper) const { const OperandsMapper &OpdMapper) const {
MachineInstr &MI = OpdMapper.getMI(); MachineInstr &MI = OpdMapper.getMI();
@ -1627,8 +1405,7 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
break; break;
} }
case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: { case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: {
auto IntrID = MI.getIntrinsicID(); switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) {
switch (IntrID) {
case Intrinsic::amdgcn_buffer_load: { case Intrinsic::amdgcn_buffer_load: {
executeInWaterfallLoop(MI, MRI, { 2 }); executeInWaterfallLoop(MI, MRI, { 2 });
return; return;
@ -1647,39 +1424,9 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
constrainOpWithReadfirstlane(MI, MRI, 2); // M0 constrainOpWithReadfirstlane(MI, MRI, 2); // M0
return; return;
} }
case Intrinsic::amdgcn_raw_buffer_load: default:
case Intrinsic::amdgcn_raw_buffer_load_format:
case Intrinsic::amdgcn_raw_tbuffer_load:
case Intrinsic::amdgcn_raw_buffer_store:
case Intrinsic::amdgcn_raw_buffer_store_format:
case Intrinsic::amdgcn_raw_tbuffer_store: {
applyDefaultMapping(OpdMapper);
executeInWaterfallLoop(MI, MRI, {2, 4});
return;
}
case Intrinsic::amdgcn_struct_buffer_load:
case Intrinsic::amdgcn_struct_buffer_store:
case Intrinsic::amdgcn_struct_tbuffer_load:
case Intrinsic::amdgcn_struct_tbuffer_store: {
applyDefaultMapping(OpdMapper);
executeInWaterfallLoop(MI, MRI, {2, 5});
return;
}
default: {
if (const AMDGPU::RsrcIntrinsic *RSrcIntrin =
AMDGPU::lookupRsrcIntrinsic(IntrID)) {
// Non-images can have complications from operands that allow both SGPR
// and VGPR. For now it's too complicated to figure out the final opcode
// to derive the register bank from the MCInstrDesc.
if (RSrcIntrin->IsImage) {
applyMappingImage(MI, OpdMapper, MRI, RSrcIntrin->RsrcArg);
return;
}
}
break; break;
} }
}
break; break;
} }
case AMDGPU::G_LOAD: case AMDGPU::G_LOAD:
@ -1784,45 +1531,6 @@ AMDGPURegisterBankInfo::getDefaultMappingAllVGPR(const MachineInstr &MI) const {
MI.getNumOperands()); MI.getNumOperands());
} }
const RegisterBankInfo::InstructionMapping &
AMDGPURegisterBankInfo::getImageMapping(const MachineRegisterInfo &MRI,
const MachineInstr &MI,
int RsrcIdx) const {
// The reported argument index is relative to the IR intrinsic call arguments,
// so we need to shift by the number of defs and the intrinsic ID.
RsrcIdx += MI.getNumExplicitDefs() + 1;
const int NumOps = MI.getNumOperands();
SmallVector<const ValueMapping *, 8> OpdsMapping(NumOps);
// TODO: Should packed/unpacked D16 difference be reported here as part of
// the value mapping?
for (int I = 0; I != NumOps; ++I) {
if (!MI.getOperand(I).isReg())
continue;
Register OpReg = MI.getOperand(I).getReg();
unsigned Size = getSizeInBits(OpReg, MRI, *TRI);
// FIXME: Probably need a new intrinsic register bank searchable table to
// handle arbitrary intrinsics easily.
//
// If this has a sampler, it immediately follows rsrc.
const bool MustBeSGPR = I == RsrcIdx || I == RsrcIdx + 1;
if (MustBeSGPR) {
// If this must be an SGPR, so we must report whatever it is as legal.
unsigned NewBank = getRegBankID(OpReg, MRI, *TRI, AMDGPU::SGPRRegBankID);
OpdsMapping[I] = AMDGPU::getValueMapping(NewBank, Size);
} else {
// Some operands must be VGPR, and these are easy to copy to.
OpdsMapping[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
}
}
return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping), NumOps);
}
const RegisterBankInfo::InstructionMapping & const RegisterBankInfo::InstructionMapping &
AMDGPURegisterBankInfo::getInstrMappingForLoad(const MachineInstr &MI) const { AMDGPURegisterBankInfo::getInstrMappingForLoad(const MachineInstr &MI) const {
@ -1869,31 +1577,11 @@ AMDGPURegisterBankInfo::getRegBankID(Register Reg,
return Bank ? Bank->getID() : Default; return Bank ? Bank->getID() : Default;
} }
static unsigned regBankUnion(unsigned RB0, unsigned RB1) { static unsigned regBankUnion(unsigned RB0, unsigned RB1) {
return (RB0 == AMDGPU::SGPRRegBankID && RB1 == AMDGPU::SGPRRegBankID) ? return (RB0 == AMDGPU::SGPRRegBankID && RB1 == AMDGPU::SGPRRegBankID) ?
AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID; AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
} }
const RegisterBankInfo::ValueMapping *
AMDGPURegisterBankInfo::getSGPROpMapping(Register Reg,
const MachineRegisterInfo &MRI,
const TargetRegisterInfo &TRI) const {
// Lie and claim anything is legal, even though this needs to be an SGPR
// applyMapping will have to deal with it as a waterfall loop.
unsigned Bank = getRegBankID(Reg, MRI, TRI, AMDGPU::SGPRRegBankID);
unsigned Size = getSizeInBits(Reg, MRI, TRI);
return AMDGPU::getValueMapping(Bank, Size);
}
const RegisterBankInfo::ValueMapping *
AMDGPURegisterBankInfo::getVGPROpMapping(Register Reg,
const MachineRegisterInfo &MRI,
const TargetRegisterInfo &TRI) const {
unsigned Size = getSizeInBits(Reg, MRI, TRI);
return AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
}
/// ///
/// This function must return a legal mapping, because /// This function must return a legal mapping, because
/// AMDGPURegisterBankInfo::getInstrAlternativeMappings() is not called /// AMDGPURegisterBankInfo::getInstrAlternativeMappings() is not called
@ -2060,6 +1748,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
LLVM_FALLTHROUGH; LLVM_FALLTHROUGH;
} }
case AMDGPU::G_GEP: case AMDGPU::G_GEP:
case AMDGPU::G_ADD: case AMDGPU::G_ADD:
case AMDGPU::G_SUB: case AMDGPU::G_SUB:
@ -2075,6 +1764,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case AMDGPU::G_SADDE: case AMDGPU::G_SADDE:
case AMDGPU::G_USUBE: case AMDGPU::G_USUBE:
case AMDGPU::G_SSUBE: case AMDGPU::G_SSUBE:
case AMDGPU::G_UMULH:
case AMDGPU::G_SMULH:
case AMDGPU::G_SMIN: case AMDGPU::G_SMIN:
case AMDGPU::G_SMAX: case AMDGPU::G_SMAX:
case AMDGPU::G_UMIN: case AMDGPU::G_UMIN:
@ -2108,13 +1799,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case AMDGPU::G_INTRINSIC_TRUNC: case AMDGPU::G_INTRINSIC_TRUNC:
case AMDGPU::G_INTRINSIC_ROUND: case AMDGPU::G_INTRINSIC_ROUND:
return getDefaultMappingVOP(MI); return getDefaultMappingVOP(MI);
case AMDGPU::G_UMULH:
case AMDGPU::G_SMULH: {
if (MF.getSubtarget<GCNSubtarget>().hasScalarMulHiInsts() &&
isSALUMapping(MI))
return getDefaultMappingSOP(MI);
return getDefaultMappingVOP(MI);
}
case AMDGPU::G_IMPLICIT_DEF: { case AMDGPU::G_IMPLICIT_DEF: {
unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size); OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
@ -2388,7 +2072,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case Intrinsic::amdgcn_wwm: case Intrinsic::amdgcn_wwm:
case Intrinsic::amdgcn_wqm: case Intrinsic::amdgcn_wqm:
return getDefaultMappingVOP(MI); return getDefaultMappingVOP(MI);
case Intrinsic::amdgcn_ds_swizzle:
case Intrinsic::amdgcn_ds_permute: case Intrinsic::amdgcn_ds_permute:
case Intrinsic::amdgcn_ds_bpermute: case Intrinsic::amdgcn_ds_bpermute:
case Intrinsic::amdgcn_update_dpp: case Intrinsic::amdgcn_update_dpp:
@ -2510,8 +2193,9 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
break; break;
} }
case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: { case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: {
auto IntrID = MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID(); switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) {
switch (IntrID) { default:
return getInvalidInstructionMapping();
case Intrinsic::amdgcn_s_getreg: case Intrinsic::amdgcn_s_getreg:
case Intrinsic::amdgcn_s_memtime: case Intrinsic::amdgcn_s_memtime:
case Intrinsic::amdgcn_s_memrealtime: case Intrinsic::amdgcn_s_memrealtime:
@ -2551,11 +2235,18 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[6] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32); OpdsMapping[6] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
break; break;
case Intrinsic::amdgcn_exp: case Intrinsic::amdgcn_exp:
OpdsMapping[0] = nullptr; // IntrinsicID
// FIXME: These are immediate values which can't be read from registers.
OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
// FIXME: Could we support packed types here? // FIXME: Could we support packed types here?
OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32); OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32); OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
OpdsMapping[5] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32); OpdsMapping[5] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
OpdsMapping[6] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32); OpdsMapping[6] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
// FIXME: These are immediate values which can't be read from registers.
OpdsMapping[7] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
OpdsMapping[8] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
break; break;
case Intrinsic::amdgcn_buffer_load: { case Intrinsic::amdgcn_buffer_load: {
Register RSrc = MI.getOperand(2).getReg(); // SGPR Register RSrc = MI.getOperand(2).getReg(); // SGPR
@ -2607,54 +2298,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1); OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
break; break;
} }
case Intrinsic::amdgcn_raw_buffer_load:
case Intrinsic::amdgcn_raw_tbuffer_load: {
// FIXME: Should make intrinsic ID the last operand of the instruction,
// then this would be the same as store
OpdsMapping[0] = getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI);
OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI);
OpdsMapping[4] = getSGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI);
break;
}
case Intrinsic::amdgcn_raw_buffer_store:
case Intrinsic::amdgcn_raw_buffer_store_format:
case Intrinsic::amdgcn_raw_tbuffer_store: {
OpdsMapping[1] = getVGPROpMapping(MI.getOperand(1).getReg(), MRI, *TRI);
OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI);
OpdsMapping[4] = getSGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI);
break;
}
case Intrinsic::amdgcn_struct_buffer_load:
case Intrinsic::amdgcn_struct_tbuffer_load: {
OpdsMapping[0] = getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI);
OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI);
OpdsMapping[4] = getVGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI);
OpdsMapping[5] = getSGPROpMapping(MI.getOperand(5).getReg(), MRI, *TRI);
break;
}
case Intrinsic::amdgcn_struct_buffer_store:
case Intrinsic::amdgcn_struct_tbuffer_store: {
OpdsMapping[1] = getVGPROpMapping(MI.getOperand(1).getReg(), MRI, *TRI);
OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI);
OpdsMapping[4] = getVGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI);
OpdsMapping[5] = getSGPROpMapping(MI.getOperand(5).getReg(), MRI, *TRI);
break;
}
default:
if (const AMDGPU::RsrcIntrinsic *RSrcIntrin =
AMDGPU::lookupRsrcIntrinsic(IntrID)) {
// Non-images can have complications from operands that allow both SGPR
// and VGPR. For now it's too complicated to figure out the final opcode
// to derive the register bank from the MCInstrDesc.
if (RSrcIntrin->IsImage)
return getImageMapping(MRI, MI, RSrcIntrin->RsrcArg);
}
return getInvalidInstructionMapping();
} }
break; break;
} }

View File

@ -23,9 +23,7 @@
namespace llvm { namespace llvm {
class LLT; class LLT;
class GCNSubtarget;
class MachineIRBuilder; class MachineIRBuilder;
class SIInstrInfo;
class SIRegisterInfo; class SIRegisterInfo;
class TargetRegisterInfo; class TargetRegisterInfo;
@ -38,15 +36,9 @@ protected:
#include "AMDGPUGenRegisterBank.inc" #include "AMDGPUGenRegisterBank.inc"
}; };
class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo { class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo {
const GCNSubtarget &Subtarget;
const SIRegisterInfo *TRI; const SIRegisterInfo *TRI;
const SIInstrInfo *TII;
bool executeInWaterfallLoop(MachineIRBuilder &B, void executeInWaterfallLoop(MachineInstr &MI,
MachineInstr &MI,
MachineRegisterInfo &MRI,
ArrayRef<unsigned> OpIndices) const;
bool executeInWaterfallLoop(MachineInstr &MI,
MachineRegisterInfo &MRI, MachineRegisterInfo &MRI,
ArrayRef<unsigned> OpIndices) const; ArrayRef<unsigned> OpIndices) const;
@ -55,19 +47,6 @@ class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo {
bool applyMappingWideLoad(MachineInstr &MI, bool applyMappingWideLoad(MachineInstr &MI,
const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper, const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper,
MachineRegisterInfo &MRI) const; MachineRegisterInfo &MRI) const;
bool
applyMappingImage(MachineInstr &MI,
const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper,
MachineRegisterInfo &MRI, int RSrcIdx) const;
Register handleD16VData(MachineIRBuilder &B, MachineRegisterInfo &MRI,
Register Reg) const;
std::pair<Register, unsigned>
splitBufferOffsets(MachineIRBuilder &B, Register Offset) const;
MachineInstr *selectStoreIntrinsic(MachineIRBuilder &B,
MachineInstr &MI) const;
/// See RegisterBankInfo::applyMapping. /// See RegisterBankInfo::applyMapping.
void applyMappingImpl(const OperandsMapper &OpdMapper) const override; void applyMappingImpl(const OperandsMapper &OpdMapper) const override;
@ -79,16 +58,6 @@ class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo {
const TargetRegisterInfo &TRI, const TargetRegisterInfo &TRI,
unsigned Default = AMDGPU::VGPRRegBankID) const; unsigned Default = AMDGPU::VGPRRegBankID) const;
// Return a value mapping for an operand that is required to be an SGPR.
const ValueMapping *getSGPROpMapping(Register Reg,
const MachineRegisterInfo &MRI,
const TargetRegisterInfo &TRI) const;
// Return a value mapping for an operand that is required to be a VGPR.
const ValueMapping *getVGPROpMapping(Register Reg,
const MachineRegisterInfo &MRI,
const TargetRegisterInfo &TRI) const;
/// Split 64-bit value \p Reg into two 32-bit halves and populate them into \p /// Split 64-bit value \p Reg into two 32-bit halves and populate them into \p
/// Regs. This appropriately sets the regbank of the new registers. /// Regs. This appropriately sets the regbank of the new registers.
void split64BitValueForMapping(MachineIRBuilder &B, void split64BitValueForMapping(MachineIRBuilder &B,
@ -121,13 +90,8 @@ class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo {
const InstructionMapping &getDefaultMappingVOP(const MachineInstr &MI) const; const InstructionMapping &getDefaultMappingVOP(const MachineInstr &MI) const;
const InstructionMapping &getDefaultMappingAllVGPR( const InstructionMapping &getDefaultMappingAllVGPR(
const MachineInstr &MI) const; const MachineInstr &MI) const;
const InstructionMapping &getImageMapping(const MachineRegisterInfo &MRI,
const MachineInstr &MI,
int RsrcIdx) const;
public: public:
AMDGPURegisterBankInfo(const GCNSubtarget &STI); AMDGPURegisterBankInfo(const TargetRegisterInfo &TRI);
unsigned copyCost(const RegisterBank &A, const RegisterBank &B, unsigned copyCost(const RegisterBank &A, const RegisterBank &B,
unsigned Size) const override; unsigned Size) const override;

View File

@ -283,7 +283,7 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
MaxWavesPerEU = AMDGPU::IsaInfo::getMaxWavesPerEU(this); MaxWavesPerEU = AMDGPU::IsaInfo::getMaxWavesPerEU(this);
CallLoweringInfo.reset(new AMDGPUCallLowering(*getTargetLowering())); CallLoweringInfo.reset(new AMDGPUCallLowering(*getTargetLowering()));
Legalizer.reset(new AMDGPULegalizerInfo(*this, TM)); Legalizer.reset(new AMDGPULegalizerInfo(*this, TM));
RegBankInfo.reset(new AMDGPURegisterBankInfo(*this)); RegBankInfo.reset(new AMDGPURegisterBankInfo(*getRegisterInfo()));
InstSelector.reset(new AMDGPUInstructionSelector( InstSelector.reset(new AMDGPUInstructionSelector(
*this, *static_cast<AMDGPURegisterBankInfo *>(RegBankInfo.get()), TM)); *this, *static_cast<AMDGPURegisterBankInfo *>(RegBankInfo.get()), TM));
} }

View File

@ -555,10 +555,6 @@ public:
return GFX9Insts; return GFX9Insts;
} }
bool hasScalarMulHiInsts() const {
return GFX9Insts;
}
TrapHandlerAbi getTrapHandlerAbi() const { TrapHandlerAbi getTrapHandlerAbi() const {
return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone; return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone;
} }

View File

@ -1135,29 +1135,29 @@ def extract_dlc : SDNodeXForm<imm, [{
multiclass MUBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt, multiclass MUBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
string opcode> { string opcode> {
def : GCNPat< def : GCNPat<
(vt (name v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset, (vt (name v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset,
timm:$cachepolicy, 0)), imm:$cachepolicy, 0)),
(!cast<MUBUF_Pseudo>(opcode # _OFFSET) $rsrc, $soffset, (as_i16imm $offset), (!cast<MUBUF_Pseudo>(opcode # _OFFSET) $rsrc, $soffset, (as_i16imm $offset),
(extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
>; >;
def : GCNPat< def : GCNPat<
(vt (name v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset, (vt (name v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset,
timm:$cachepolicy, 0)), imm:$cachepolicy, 0)),
(!cast<MUBUF_Pseudo>(opcode # _OFFEN) $voffset, $rsrc, $soffset, (as_i16imm $offset), (!cast<MUBUF_Pseudo>(opcode # _OFFEN) $voffset, $rsrc, $soffset, (as_i16imm $offset),
(extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
>; >;
def : GCNPat< def : GCNPat<
(vt (name v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset, (vt (name v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset,
timm:$cachepolicy, timm)), imm:$cachepolicy, imm)),
(!cast<MUBUF_Pseudo>(opcode # _IDXEN) $vindex, $rsrc, $soffset, (as_i16imm $offset), (!cast<MUBUF_Pseudo>(opcode # _IDXEN) $vindex, $rsrc, $soffset, (as_i16imm $offset),
(extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
>; >;
def : GCNPat< def : GCNPat<
(vt (name v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, timm:$offset, (vt (name v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, imm:$offset,
timm:$cachepolicy, timm)), imm:$cachepolicy, imm)),
(!cast<MUBUF_Pseudo>(opcode # _BOTHEN) (!cast<MUBUF_Pseudo>(opcode # _BOTHEN)
(REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1), (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
$rsrc, $soffset, (as_i16imm $offset), $rsrc, $soffset, (as_i16imm $offset),
@ -1210,31 +1210,31 @@ defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_ushort, i32, "BUFFER_LOAD_USHORT">;
multiclass MUBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt, multiclass MUBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
string opcode> { string opcode> {
def : GCNPat< def : GCNPat<
(name vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset, (name vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset,
timm:$cachepolicy, 0), imm:$cachepolicy, 0),
(!cast<MUBUF_Pseudo>(opcode # _OFFSET_exact) $vdata, $rsrc, $soffset, (as_i16imm $offset), (!cast<MUBUF_Pseudo>(opcode # _OFFSET_exact) $vdata, $rsrc, $soffset, (as_i16imm $offset),
(extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
>; >;
def : GCNPat< def : GCNPat<
(name vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset, (name vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset,
timm:$cachepolicy, 0), imm:$cachepolicy, 0),
(!cast<MUBUF_Pseudo>(opcode # _OFFEN_exact) $vdata, $voffset, $rsrc, $soffset, (!cast<MUBUF_Pseudo>(opcode # _OFFEN_exact) $vdata, $voffset, $rsrc, $soffset,
(as_i16imm $offset), (extract_glc $cachepolicy), (as_i16imm $offset), (extract_glc $cachepolicy),
(extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
>; >;
def : GCNPat< def : GCNPat<
(name vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset, (name vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset,
timm:$cachepolicy, timm), imm:$cachepolicy, imm),
(!cast<MUBUF_Pseudo>(opcode # _IDXEN_exact) $vdata, $vindex, $rsrc, $soffset, (!cast<MUBUF_Pseudo>(opcode # _IDXEN_exact) $vdata, $vindex, $rsrc, $soffset,
(as_i16imm $offset), (extract_glc $cachepolicy), (as_i16imm $offset), (extract_glc $cachepolicy),
(extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
>; >;
def : GCNPat< def : GCNPat<
(name vt:$vdata, v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, timm:$offset, (name vt:$vdata, v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, imm:$offset,
timm:$cachepolicy, timm), imm:$cachepolicy, imm),
(!cast<MUBUF_Pseudo>(opcode # _BOTHEN_exact) (!cast<MUBUF_Pseudo>(opcode # _BOTHEN_exact)
$vdata, $vdata,
(REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1), (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
@ -1291,32 +1291,32 @@ multiclass BufferAtomicPatterns<SDPatternOperator name, ValueType vt,
string opcode> { string opcode> {
def : GCNPat< def : GCNPat<
(vt (name vt:$vdata_in, v4i32:$rsrc, 0, (vt (name vt:$vdata_in, v4i32:$rsrc, 0,
0, i32:$soffset, timm:$offset, 0, i32:$soffset, imm:$offset,
timm:$cachepolicy, 0)), imm:$cachepolicy, 0)),
(!cast<MUBUF_Pseudo>(opcode # _OFFSET_RTN) $vdata_in, $rsrc, $soffset, (!cast<MUBUF_Pseudo>(opcode # _OFFSET_RTN) $vdata_in, $rsrc, $soffset,
(as_i16imm $offset), (extract_slc $cachepolicy)) (as_i16imm $offset), (extract_slc $cachepolicy))
>; >;
def : GCNPat< def : GCNPat<
(vt (name vt:$vdata_in, v4i32:$rsrc, i32:$vindex, (vt (name vt:$vdata_in, v4i32:$rsrc, i32:$vindex,
0, i32:$soffset, timm:$offset, 0, i32:$soffset, imm:$offset,
timm:$cachepolicy, timm)), imm:$cachepolicy, imm)),
(!cast<MUBUF_Pseudo>(opcode # _IDXEN_RTN) $vdata_in, $vindex, $rsrc, $soffset, (!cast<MUBUF_Pseudo>(opcode # _IDXEN_RTN) $vdata_in, $vindex, $rsrc, $soffset,
(as_i16imm $offset), (extract_slc $cachepolicy)) (as_i16imm $offset), (extract_slc $cachepolicy))
>; >;
def : GCNPat< def : GCNPat<
(vt (name vt:$vdata_in, v4i32:$rsrc, 0, (vt (name vt:$vdata_in, v4i32:$rsrc, 0,
i32:$voffset, i32:$soffset, timm:$offset, i32:$voffset, i32:$soffset, imm:$offset,
timm:$cachepolicy, 0)), imm:$cachepolicy, 0)),
(!cast<MUBUF_Pseudo>(opcode # _OFFEN_RTN) $vdata_in, $voffset, $rsrc, $soffset, (!cast<MUBUF_Pseudo>(opcode # _OFFEN_RTN) $vdata_in, $voffset, $rsrc, $soffset,
(as_i16imm $offset), (extract_slc $cachepolicy)) (as_i16imm $offset), (extract_slc $cachepolicy))
>; >;
def : GCNPat< def : GCNPat<
(vt (name vt:$vdata_in, v4i32:$rsrc, i32:$vindex, (vt (name vt:$vdata_in, v4i32:$rsrc, i32:$vindex,
i32:$voffset, i32:$soffset, timm:$offset, i32:$voffset, i32:$soffset, imm:$offset,
timm:$cachepolicy, timm)), imm:$cachepolicy, imm)),
(!cast<MUBUF_Pseudo>(opcode # _BOTHEN_RTN) (!cast<MUBUF_Pseudo>(opcode # _BOTHEN_RTN)
$vdata_in, $vdata_in,
(REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1), (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
@ -1353,32 +1353,32 @@ multiclass BufferAtomicPatterns_NO_RTN<SDPatternOperator name, ValueType vt,
string opcode> { string opcode> {
def : GCNPat< def : GCNPat<
(name vt:$vdata_in, v4i32:$rsrc, 0, (name vt:$vdata_in, v4i32:$rsrc, 0,
0, i32:$soffset, timm:$offset, 0, i32:$soffset, imm:$offset,
timm:$cachepolicy, 0), imm:$cachepolicy, 0),
(!cast<MUBUF_Pseudo>(opcode # _OFFSET) $vdata_in, $rsrc, $soffset, (!cast<MUBUF_Pseudo>(opcode # _OFFSET) $vdata_in, $rsrc, $soffset,
(as_i16imm $offset), (extract_slc $cachepolicy)) (as_i16imm $offset), (extract_slc $cachepolicy))
>; >;
def : GCNPat< def : GCNPat<
(name vt:$vdata_in, v4i32:$rsrc, i32:$vindex, (name vt:$vdata_in, v4i32:$rsrc, i32:$vindex,
0, i32:$soffset, timm:$offset, 0, i32:$soffset, imm:$offset,
timm:$cachepolicy, timm), imm:$cachepolicy, imm),
(!cast<MUBUF_Pseudo>(opcode # _IDXEN) $vdata_in, $vindex, $rsrc, $soffset, (!cast<MUBUF_Pseudo>(opcode # _IDXEN) $vdata_in, $vindex, $rsrc, $soffset,
(as_i16imm $offset), (extract_slc $cachepolicy)) (as_i16imm $offset), (extract_slc $cachepolicy))
>; >;
def : GCNPat< def : GCNPat<
(name vt:$vdata_in, v4i32:$rsrc, 0, (name vt:$vdata_in, v4i32:$rsrc, 0,
i32:$voffset, i32:$soffset, timm:$offset, i32:$voffset, i32:$soffset, imm:$offset,
timm:$cachepolicy, 0), imm:$cachepolicy, 0),
(!cast<MUBUF_Pseudo>(opcode # _OFFEN) $vdata_in, $voffset, $rsrc, $soffset, (!cast<MUBUF_Pseudo>(opcode # _OFFEN) $vdata_in, $voffset, $rsrc, $soffset,
(as_i16imm $offset), (extract_slc $cachepolicy)) (as_i16imm $offset), (extract_slc $cachepolicy))
>; >;
def : GCNPat< def : GCNPat<
(name vt:$vdata_in, v4i32:$rsrc, i32:$vindex, (name vt:$vdata_in, v4i32:$rsrc, i32:$vindex,
i32:$voffset, i32:$soffset, timm:$offset, i32:$voffset, i32:$soffset, imm:$offset,
timm:$cachepolicy, timm), imm:$cachepolicy, imm),
(!cast<MUBUF_Pseudo>(opcode # _BOTHEN) (!cast<MUBUF_Pseudo>(opcode # _BOTHEN)
$vdata_in, $vdata_in,
(REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1), (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
@ -1392,8 +1392,8 @@ defm : BufferAtomicPatterns_NO_RTN<SIbuffer_atomic_pk_fadd, v2f16, "BUFFER_ATOMI
def : GCNPat< def : GCNPat<
(SIbuffer_atomic_cmpswap (SIbuffer_atomic_cmpswap
i32:$data, i32:$cmp, v4i32:$rsrc, 0, i32:$data, i32:$cmp, v4i32:$rsrc, 0,
0, i32:$soffset, timm:$offset, 0, i32:$soffset, imm:$offset,
timm:$cachepolicy, 0), imm:$cachepolicy, 0),
(EXTRACT_SUBREG (EXTRACT_SUBREG
(BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN (BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN
(REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1), (REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1),
@ -1404,8 +1404,8 @@ def : GCNPat<
def : GCNPat< def : GCNPat<
(SIbuffer_atomic_cmpswap (SIbuffer_atomic_cmpswap
i32:$data, i32:$cmp, v4i32:$rsrc, i32:$vindex, i32:$data, i32:$cmp, v4i32:$rsrc, i32:$vindex,
0, i32:$soffset, timm:$offset, 0, i32:$soffset, imm:$offset,
timm:$cachepolicy, timm), imm:$cachepolicy, imm),
(EXTRACT_SUBREG (EXTRACT_SUBREG
(BUFFER_ATOMIC_CMPSWAP_IDXEN_RTN (BUFFER_ATOMIC_CMPSWAP_IDXEN_RTN
(REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1), (REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1),
@ -1416,8 +1416,8 @@ def : GCNPat<
def : GCNPat< def : GCNPat<
(SIbuffer_atomic_cmpswap (SIbuffer_atomic_cmpswap
i32:$data, i32:$cmp, v4i32:$rsrc, 0, i32:$data, i32:$cmp, v4i32:$rsrc, 0,
i32:$voffset, i32:$soffset, timm:$offset, i32:$voffset, i32:$soffset, imm:$offset,
timm:$cachepolicy, 0), imm:$cachepolicy, 0),
(EXTRACT_SUBREG (EXTRACT_SUBREG
(BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN (BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN
(REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1), (REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1),
@ -1428,8 +1428,8 @@ def : GCNPat<
def : GCNPat< def : GCNPat<
(SIbuffer_atomic_cmpswap (SIbuffer_atomic_cmpswap
i32:$data, i32:$cmp, v4i32:$rsrc, i32:$vindex, i32:$data, i32:$cmp, v4i32:$rsrc, i32:$vindex,
i32:$voffset, i32:$soffset, timm:$offset, i32:$voffset, i32:$soffset, imm:$offset,
timm:$cachepolicy, timm), imm:$cachepolicy, imm),
(EXTRACT_SUBREG (EXTRACT_SUBREG
(BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN (BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN
(REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1), (REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1),
@ -1642,32 +1642,32 @@ defm : MUBUFScratchStorePat <BUFFER_STORE_BYTE_D16_HI_OFFEN, BUFFER_STORE_BYTE_D
multiclass MTBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt, multiclass MTBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
string opcode> { string opcode> {
def : GCNPat< def : GCNPat<
(vt (name v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset, (vt (name v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset,
timm:$format, timm:$cachepolicy, 0)), imm:$format, imm:$cachepolicy, 0)),
(!cast<MTBUF_Pseudo>(opcode # _OFFSET) $rsrc, $soffset, (as_i16imm $offset), (!cast<MTBUF_Pseudo>(opcode # _OFFSET) $rsrc, $soffset, (as_i16imm $offset),
(as_i8imm $format), (as_i8imm $format),
(extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
>; >;
def : GCNPat< def : GCNPat<
(vt (name v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset, (vt (name v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset,
timm:$format, timm:$cachepolicy, timm)), imm:$format, imm:$cachepolicy, imm)),
(!cast<MTBUF_Pseudo>(opcode # _IDXEN) $vindex, $rsrc, $soffset, (as_i16imm $offset), (!cast<MTBUF_Pseudo>(opcode # _IDXEN) $vindex, $rsrc, $soffset, (as_i16imm $offset),
(as_i8imm $format), (as_i8imm $format),
(extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
>; >;
def : GCNPat< def : GCNPat<
(vt (name v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset, (vt (name v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset,
timm:$format, timm:$cachepolicy, 0)), imm:$format, imm:$cachepolicy, 0)),
(!cast<MTBUF_Pseudo>(opcode # _OFFEN) $voffset, $rsrc, $soffset, (as_i16imm $offset), (!cast<MTBUF_Pseudo>(opcode # _OFFEN) $voffset, $rsrc, $soffset, (as_i16imm $offset),
(as_i8imm $format), (as_i8imm $format),
(extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
>; >;
def : GCNPat< def : GCNPat<
(vt (name v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, timm:$offset, (vt (name v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, imm:$offset,
timm:$format, timm:$cachepolicy, timm)), imm:$format, imm:$cachepolicy, imm)),
(!cast<MTBUF_Pseudo>(opcode # _BOTHEN) (!cast<MTBUF_Pseudo>(opcode # _BOTHEN)
(REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1), (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
$rsrc, $soffset, (as_i16imm $offset), $rsrc, $soffset, (as_i16imm $offset),
@ -1700,24 +1700,24 @@ let SubtargetPredicate = HasPackedD16VMem in {
multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt, multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
string opcode> { string opcode> {
def : GCNPat< def : GCNPat<
(name vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset, (name vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset,
timm:$format, timm:$cachepolicy, 0), imm:$format, imm:$cachepolicy, 0),
(!cast<MTBUF_Pseudo>(opcode # _OFFSET_exact) $vdata, $rsrc, $soffset, (!cast<MTBUF_Pseudo>(opcode # _OFFSET_exact) $vdata, $rsrc, $soffset,
(as_i16imm $offset), (as_i8imm $format), (as_i16imm $offset), (as_i8imm $format),
(extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
>; >;
def : GCNPat< def : GCNPat<
(name vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset, (name vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset,
timm:$format, timm:$cachepolicy, timm), imm:$format, imm:$cachepolicy, imm),
(!cast<MTBUF_Pseudo>(opcode # _IDXEN_exact) $vdata, $vindex, $rsrc, $soffset, (!cast<MTBUF_Pseudo>(opcode # _IDXEN_exact) $vdata, $vindex, $rsrc, $soffset,
(as_i16imm $offset), (as_i8imm $format), (as_i16imm $offset), (as_i8imm $format),
(extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
>; >;
def : GCNPat< def : GCNPat<
(name vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset, (name vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset,
timm:$format, timm:$cachepolicy, 0), imm:$format, imm:$cachepolicy, 0),
(!cast<MTBUF_Pseudo>(opcode # _OFFEN_exact) $vdata, $voffset, $rsrc, $soffset, (!cast<MTBUF_Pseudo>(opcode # _OFFEN_exact) $vdata, $voffset, $rsrc, $soffset,
(as_i16imm $offset), (as_i8imm $format), (as_i16imm $offset), (as_i8imm $format),
(extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
@ -1725,7 +1725,7 @@ multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
def : GCNPat< def : GCNPat<
(name vt:$vdata, v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, (name vt:$vdata, v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset,
timm:$offset, timm:$format, timm:$cachepolicy, timm), imm:$offset, imm:$format, imm:$cachepolicy, imm),
(!cast<MTBUF_Pseudo>(opcode # _BOTHEN_exact) (!cast<MTBUF_Pseudo>(opcode # _BOTHEN_exact)
$vdata, $vdata,
(REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1), (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),

View File

@ -603,7 +603,7 @@ def DS_ADD_SRC2_F32 : DS_1A<"ds_add_src2_f32">;
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
def : GCNPat < def : GCNPat <
(int_amdgcn_ds_swizzle i32:$src, timm:$offset16), (int_amdgcn_ds_swizzle i32:$src, imm:$offset16),
(DS_SWIZZLE_B32 $src, (as_i16imm $offset16), (i1 0)) (DS_SWIZZLE_B32 $src, (as_i16imm $offset16), (i1 0))
>; >;

View File

@ -5666,14 +5666,14 @@ SDValue SITargetLowering::lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc,
SDVTList VTList = DAG.getVTList({LoadVT, MVT::Glue}); SDVTList VTList = DAG.getVTList({LoadVT, MVT::Glue});
unsigned CachePolicy = cast<ConstantSDNode>(GLC)->getZExtValue(); unsigned CachePolicy = cast<ConstantSDNode>(GLC)->getZExtValue();
SDValue Ops[] = { SDValue Ops[] = {
DAG.getEntryNode(), // Chain DAG.getEntryNode(), // Chain
Rsrc, // rsrc Rsrc, // rsrc
DAG.getConstant(0, DL, MVT::i32), // vindex DAG.getConstant(0, DL, MVT::i32), // vindex
{}, // voffset {}, // voffset
{}, // soffset {}, // soffset
{}, // offset {}, // offset
DAG.getTargetConstant(CachePolicy, DL, MVT::i32), // cachepolicy DAG.getConstant(CachePolicy, DL, MVT::i32), // cachepolicy
DAG.getTargetConstant(0, DL, MVT::i1), // idxen DAG.getConstant(0, DL, MVT::i1), // idxen
}; };
// Use the alignment to ensure that the required offsets will fit into the // Use the alignment to ensure that the required offsets will fit into the
@ -5682,7 +5682,7 @@ SDValue SITargetLowering::lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc,
uint64_t InstOffset = cast<ConstantSDNode>(Ops[5])->getZExtValue(); uint64_t InstOffset = cast<ConstantSDNode>(Ops[5])->getZExtValue();
for (unsigned i = 0; i < NumLoads; ++i) { for (unsigned i = 0; i < NumLoads; ++i) {
Ops[5] = DAG.getTargetConstant(InstOffset + 16 * i, DL, MVT::i32); Ops[5] = DAG.getConstant(InstOffset + 16 * i, DL, MVT::i32);
Loads.push_back(DAG.getMemIntrinsicNode(AMDGPUISD::BUFFER_LOAD, DL, VTList, Loads.push_back(DAG.getMemIntrinsicNode(AMDGPUISD::BUFFER_LOAD, DL, VTList,
Ops, LoadVT, MMO)); Ops, LoadVT, MMO));
} }
@ -5894,12 +5894,12 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
Op.getOperand(1), // Src0 Op.getOperand(1), // Src0
Op.getOperand(2), // Attrchan Op.getOperand(2), // Attrchan
Op.getOperand(3), // Attr Op.getOperand(3), // Attr
DAG.getTargetConstant(0, DL, MVT::i32), // $src0_modifiers DAG.getConstant(0, DL, MVT::i32), // $src0_modifiers
S, // Src2 - holds two f16 values selected by high S, // Src2 - holds two f16 values selected by high
DAG.getTargetConstant(0, DL, MVT::i32), // $src2_modifiers DAG.getConstant(0, DL, MVT::i32), // $src2_modifiers
Op.getOperand(4), // high Op.getOperand(4), // high
DAG.getTargetConstant(0, DL, MVT::i1), // $clamp DAG.getConstant(0, DL, MVT::i1), // $clamp
DAG.getTargetConstant(0, DL, MVT::i32) // $omod DAG.getConstant(0, DL, MVT::i32) // $omod
}; };
return DAG.getNode(AMDGPUISD::INTERP_P1LV_F16, DL, MVT::f32, Ops); return DAG.getNode(AMDGPUISD::INTERP_P1LV_F16, DL, MVT::f32, Ops);
} else { } else {
@ -5908,10 +5908,10 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
Op.getOperand(1), // Src0 Op.getOperand(1), // Src0
Op.getOperand(2), // Attrchan Op.getOperand(2), // Attrchan
Op.getOperand(3), // Attr Op.getOperand(3), // Attr
DAG.getTargetConstant(0, DL, MVT::i32), // $src0_modifiers DAG.getConstant(0, DL, MVT::i32), // $src0_modifiers
Op.getOperand(4), // high Op.getOperand(4), // high
DAG.getTargetConstant(0, DL, MVT::i1), // $clamp DAG.getConstant(0, DL, MVT::i1), // $clamp
DAG.getTargetConstant(0, DL, MVT::i32), // $omod DAG.getConstant(0, DL, MVT::i32), // $omod
Glue Glue
}; };
return DAG.getNode(AMDGPUISD::INTERP_P1LL_F16, DL, MVT::f32, Ops); return DAG.getNode(AMDGPUISD::INTERP_P1LL_F16, DL, MVT::f32, Ops);
@ -5924,11 +5924,11 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
Op.getOperand(2), // Src0 Op.getOperand(2), // Src0
Op.getOperand(3), // Attrchan Op.getOperand(3), // Attrchan
Op.getOperand(4), // Attr Op.getOperand(4), // Attr
DAG.getTargetConstant(0, DL, MVT::i32), // $src0_modifiers DAG.getConstant(0, DL, MVT::i32), // $src0_modifiers
Op.getOperand(1), // Src2 Op.getOperand(1), // Src2
DAG.getTargetConstant(0, DL, MVT::i32), // $src2_modifiers DAG.getConstant(0, DL, MVT::i32), // $src2_modifiers
Op.getOperand(5), // high Op.getOperand(5), // high
DAG.getTargetConstant(0, DL, MVT::i1), // $clamp DAG.getConstant(0, DL, MVT::i1), // $clamp
Glue Glue
}; };
return DAG.getNode(AMDGPUISD::INTERP_P2_F16, DL, MVT::f16, Ops); return DAG.getNode(AMDGPUISD::INTERP_P2_F16, DL, MVT::f16, Ops);
@ -6234,8 +6234,8 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
SDValue(), // voffset -- will be set by setBufferOffsets SDValue(), // voffset -- will be set by setBufferOffsets
SDValue(), // soffset -- will be set by setBufferOffsets SDValue(), // soffset -- will be set by setBufferOffsets
SDValue(), // offset -- will be set by setBufferOffsets SDValue(), // offset -- will be set by setBufferOffsets
DAG.getTargetConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy DAG.getConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy
DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen DAG.getConstant(IdxEn, DL, MVT::i1), // idxen
}; };
setBufferOffsets(Op.getOperand(4), DAG, &Ops[3]); setBufferOffsets(Op.getOperand(4), DAG, &Ops[3]);
@ -6272,7 +6272,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
Op.getOperand(4), // soffset Op.getOperand(4), // soffset
Offsets.second, // offset Offsets.second, // offset
Op.getOperand(5), // cachepolicy Op.getOperand(5), // cachepolicy
DAG.getTargetConstant(0, DL, MVT::i1), // idxen DAG.getConstant(0, DL, MVT::i1), // idxen
}; };
return lowerIntrinsicLoad(cast<MemSDNode>(Op), IsFormat, DAG, Ops); return lowerIntrinsicLoad(cast<MemSDNode>(Op), IsFormat, DAG, Ops);
@ -6290,7 +6290,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
Op.getOperand(5), // soffset Op.getOperand(5), // soffset
Offsets.second, // offset Offsets.second, // offset
Op.getOperand(6), // cachepolicy Op.getOperand(6), // cachepolicy
DAG.getTargetConstant(1, DL, MVT::i1), // idxen DAG.getConstant(1, DL, MVT::i1), // idxen
}; };
return lowerIntrinsicLoad(cast<MemSDNode>(Op), IsFormat, DAG, Ops); return lowerIntrinsicLoad(cast<MemSDNode>(Op), IsFormat, DAG, Ops);
@ -6313,9 +6313,9 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
Op.getOperand(4), // voffset Op.getOperand(4), // voffset
Op.getOperand(5), // soffset Op.getOperand(5), // soffset
Op.getOperand(6), // offset Op.getOperand(6), // offset
DAG.getTargetConstant(Dfmt | (Nfmt << 4), DL, MVT::i32), // format DAG.getConstant(Dfmt | (Nfmt << 4), DL, MVT::i32), // format
DAG.getTargetConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy DAG.getConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy
DAG.getTargetConstant(IdxEn, DL, MVT::i1) // idxen DAG.getConstant(IdxEn, DL, MVT::i1), // idxen
}; };
if (LoadVT.getScalarType() == MVT::f16) if (LoadVT.getScalarType() == MVT::f16)
@ -6339,7 +6339,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
Offsets.second, // offset Offsets.second, // offset
Op.getOperand(5), // format Op.getOperand(5), // format
Op.getOperand(6), // cachepolicy Op.getOperand(6), // cachepolicy
DAG.getTargetConstant(0, DL, MVT::i1), // idxen DAG.getConstant(0, DL, MVT::i1), // idxen
}; };
if (LoadVT.getScalarType() == MVT::f16) if (LoadVT.getScalarType() == MVT::f16)
@ -6363,7 +6363,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
Offsets.second, // offset Offsets.second, // offset
Op.getOperand(6), // format Op.getOperand(6), // format
Op.getOperand(7), // cachepolicy Op.getOperand(7), // cachepolicy
DAG.getTargetConstant(1, DL, MVT::i1), // idxen DAG.getConstant(1, DL, MVT::i1), // idxen
}; };
if (LoadVT.getScalarType() == MVT::f16) if (LoadVT.getScalarType() == MVT::f16)
@ -6395,8 +6395,8 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
SDValue(), // voffset -- will be set by setBufferOffsets SDValue(), // voffset -- will be set by setBufferOffsets
SDValue(), // soffset -- will be set by setBufferOffsets SDValue(), // soffset -- will be set by setBufferOffsets
SDValue(), // offset -- will be set by setBufferOffsets SDValue(), // offset -- will be set by setBufferOffsets
DAG.getTargetConstant(Slc << 1, DL, MVT::i32), // cachepolicy DAG.getConstant(Slc << 1, DL, MVT::i32), // cachepolicy
DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen DAG.getConstant(IdxEn, DL, MVT::i1), // idxen
}; };
setBufferOffsets(Op.getOperand(5), DAG, &Ops[4]); setBufferOffsets(Op.getOperand(5), DAG, &Ops[4]);
EVT VT = Op.getValueType(); EVT VT = Op.getValueType();
@ -6464,7 +6464,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
Op.getOperand(5), // soffset Op.getOperand(5), // soffset
Offsets.second, // offset Offsets.second, // offset
Op.getOperand(6), // cachepolicy Op.getOperand(6), // cachepolicy
DAG.getTargetConstant(0, DL, MVT::i1), // idxen DAG.getConstant(0, DL, MVT::i1), // idxen
}; };
EVT VT = Op.getValueType(); EVT VT = Op.getValueType();
@ -6537,7 +6537,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
Op.getOperand(6), // soffset Op.getOperand(6), // soffset
Offsets.second, // offset Offsets.second, // offset
Op.getOperand(7), // cachepolicy Op.getOperand(7), // cachepolicy
DAG.getTargetConstant(1, DL, MVT::i1), // idxen DAG.getConstant(1, DL, MVT::i1), // idxen
}; };
EVT VT = Op.getValueType(); EVT VT = Op.getValueType();
@ -6602,8 +6602,8 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
SDValue(), // voffset -- will be set by setBufferOffsets SDValue(), // voffset -- will be set by setBufferOffsets
SDValue(), // soffset -- will be set by setBufferOffsets SDValue(), // soffset -- will be set by setBufferOffsets
SDValue(), // offset -- will be set by setBufferOffsets SDValue(), // offset -- will be set by setBufferOffsets
DAG.getTargetConstant(Slc << 1, DL, MVT::i32), // cachepolicy DAG.getConstant(Slc << 1, DL, MVT::i32), // cachepolicy
DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen DAG.getConstant(IdxEn, DL, MVT::i1), // idxen
}; };
setBufferOffsets(Op.getOperand(6), DAG, &Ops[5]); setBufferOffsets(Op.getOperand(6), DAG, &Ops[5]);
EVT VT = Op.getValueType(); EVT VT = Op.getValueType();
@ -6624,7 +6624,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
Op.getOperand(6), // soffset Op.getOperand(6), // soffset
Offsets.second, // offset Offsets.second, // offset
Op.getOperand(7), // cachepolicy Op.getOperand(7), // cachepolicy
DAG.getTargetConstant(0, DL, MVT::i1), // idxen DAG.getConstant(0, DL, MVT::i1), // idxen
}; };
EVT VT = Op.getValueType(); EVT VT = Op.getValueType();
auto *M = cast<MemSDNode>(Op); auto *M = cast<MemSDNode>(Op);
@ -6644,7 +6644,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
Op.getOperand(7), // soffset Op.getOperand(7), // soffset
Offsets.second, // offset Offsets.second, // offset
Op.getOperand(8), // cachepolicy Op.getOperand(8), // cachepolicy
DAG.getTargetConstant(1, DL, MVT::i1), // idxen DAG.getConstant(1, DL, MVT::i1), // idxen
}; };
EVT VT = Op.getValueType(); EVT VT = Op.getValueType();
auto *M = cast<MemSDNode>(Op); auto *M = cast<MemSDNode>(Op);
@ -6806,9 +6806,9 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
Op.getOperand(5), // voffset Op.getOperand(5), // voffset
Op.getOperand(6), // soffset Op.getOperand(6), // soffset
Op.getOperand(7), // offset Op.getOperand(7), // offset
DAG.getTargetConstant(Dfmt | (Nfmt << 4), DL, MVT::i32), // format DAG.getConstant(Dfmt | (Nfmt << 4), DL, MVT::i32), // format
DAG.getTargetConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy DAG.getConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy
DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idexen DAG.getConstant(IdxEn, DL, MVT::i1), // idexen
}; };
unsigned Opc = IsD16 ? AMDGPUISD::TBUFFER_STORE_FORMAT_D16 : unsigned Opc = IsD16 ? AMDGPUISD::TBUFFER_STORE_FORMAT_D16 :
AMDGPUISD::TBUFFER_STORE_FORMAT; AMDGPUISD::TBUFFER_STORE_FORMAT;
@ -6833,7 +6833,7 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
Offsets.second, // offset Offsets.second, // offset
Op.getOperand(7), // format Op.getOperand(7), // format
Op.getOperand(8), // cachepolicy Op.getOperand(8), // cachepolicy
DAG.getTargetConstant(1, DL, MVT::i1), // idexen DAG.getConstant(1, DL, MVT::i1), // idexen
}; };
unsigned Opc = IsD16 ? AMDGPUISD::TBUFFER_STORE_FORMAT_D16 : unsigned Opc = IsD16 ? AMDGPUISD::TBUFFER_STORE_FORMAT_D16 :
AMDGPUISD::TBUFFER_STORE_FORMAT; AMDGPUISD::TBUFFER_STORE_FORMAT;
@ -6858,7 +6858,7 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
Offsets.second, // offset Offsets.second, // offset
Op.getOperand(6), // format Op.getOperand(6), // format
Op.getOperand(7), // cachepolicy Op.getOperand(7), // cachepolicy
DAG.getTargetConstant(0, DL, MVT::i1), // idexen DAG.getConstant(0, DL, MVT::i1), // idexen
}; };
unsigned Opc = IsD16 ? AMDGPUISD::TBUFFER_STORE_FORMAT_D16 : unsigned Opc = IsD16 ? AMDGPUISD::TBUFFER_STORE_FORMAT_D16 :
AMDGPUISD::TBUFFER_STORE_FORMAT; AMDGPUISD::TBUFFER_STORE_FORMAT;
@ -6886,8 +6886,8 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
SDValue(), // voffset -- will be set by setBufferOffsets SDValue(), // voffset -- will be set by setBufferOffsets
SDValue(), // soffset -- will be set by setBufferOffsets SDValue(), // soffset -- will be set by setBufferOffsets
SDValue(), // offset -- will be set by setBufferOffsets SDValue(), // offset -- will be set by setBufferOffsets
DAG.getTargetConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy DAG.getConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy
DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen DAG.getConstant(IdxEn, DL, MVT::i1), // idxen
}; };
setBufferOffsets(Op.getOperand(5), DAG, &Ops[4]); setBufferOffsets(Op.getOperand(5), DAG, &Ops[4]);
unsigned Opc = IntrinsicID == Intrinsic::amdgcn_buffer_store ? unsigned Opc = IntrinsicID == Intrinsic::amdgcn_buffer_store ?
@ -6932,7 +6932,7 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
Op.getOperand(5), // soffset Op.getOperand(5), // soffset
Offsets.second, // offset Offsets.second, // offset
Op.getOperand(6), // cachepolicy Op.getOperand(6), // cachepolicy
DAG.getTargetConstant(0, DL, MVT::i1), // idxen DAG.getConstant(0, DL, MVT::i1), // idxen
}; };
unsigned Opc = unsigned Opc =
IsFormat ? AMDGPUISD::BUFFER_STORE_FORMAT : AMDGPUISD::BUFFER_STORE; IsFormat ? AMDGPUISD::BUFFER_STORE_FORMAT : AMDGPUISD::BUFFER_STORE;
@ -6976,7 +6976,7 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
Op.getOperand(6), // soffset Op.getOperand(6), // soffset
Offsets.second, // offset Offsets.second, // offset
Op.getOperand(7), // cachepolicy Op.getOperand(7), // cachepolicy
DAG.getTargetConstant(1, DL, MVT::i1), // idxen DAG.getConstant(1, DL, MVT::i1), // idxen
}; };
unsigned Opc = IntrinsicID == Intrinsic::amdgcn_struct_buffer_store ? unsigned Opc = IntrinsicID == Intrinsic::amdgcn_struct_buffer_store ?
AMDGPUISD::BUFFER_STORE : AMDGPUISD::BUFFER_STORE_FORMAT; AMDGPUISD::BUFFER_STORE : AMDGPUISD::BUFFER_STORE_FORMAT;
@ -7005,8 +7005,8 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
SDValue(), // voffset -- will be set by setBufferOffsets SDValue(), // voffset -- will be set by setBufferOffsets
SDValue(), // soffset -- will be set by setBufferOffsets SDValue(), // soffset -- will be set by setBufferOffsets
SDValue(), // offset -- will be set by setBufferOffsets SDValue(), // offset -- will be set by setBufferOffsets
DAG.getTargetConstant(Slc << 1, DL, MVT::i32), // cachepolicy DAG.getConstant(Slc << 1, DL, MVT::i32), // cachepolicy
DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen DAG.getConstant(IdxEn, DL, MVT::i1), // idxen
}; };
setBufferOffsets(Op.getOperand(5), DAG, &Ops[4]); setBufferOffsets(Op.getOperand(5), DAG, &Ops[4]);
EVT VT = Op.getOperand(2).getValueType(); EVT VT = Op.getOperand(2).getValueType();
@ -7084,7 +7084,7 @@ std::pair<SDValue, SDValue> SITargetLowering::splitBufferOffsets(
Overflow += ImmOffset; Overflow += ImmOffset;
ImmOffset = 0; ImmOffset = 0;
} }
C1 = cast<ConstantSDNode>(DAG.getTargetConstant(ImmOffset, DL, MVT::i32)); C1 = cast<ConstantSDNode>(DAG.getConstant(ImmOffset, DL, MVT::i32));
if (Overflow) { if (Overflow) {
auto OverflowVal = DAG.getConstant(Overflow, DL, MVT::i32); auto OverflowVal = DAG.getConstant(Overflow, DL, MVT::i32);
if (!N0) if (!N0)
@ -7098,7 +7098,7 @@ std::pair<SDValue, SDValue> SITargetLowering::splitBufferOffsets(
if (!N0) if (!N0)
N0 = DAG.getConstant(0, DL, MVT::i32); N0 = DAG.getConstant(0, DL, MVT::i32);
if (!C1) if (!C1)
C1 = cast<ConstantSDNode>(DAG.getTargetConstant(0, DL, MVT::i32)); C1 = cast<ConstantSDNode>(DAG.getConstant(0, DL, MVT::i32));
return {N0, SDValue(C1, 0)}; return {N0, SDValue(C1, 0)};
} }
@ -7115,7 +7115,7 @@ void SITargetLowering::setBufferOffsets(SDValue CombinedOffset,
if (AMDGPU::splitMUBUFOffset(Imm, SOffset, ImmOffset, Subtarget, Align)) { if (AMDGPU::splitMUBUFOffset(Imm, SOffset, ImmOffset, Subtarget, Align)) {
Offsets[0] = DAG.getConstant(0, DL, MVT::i32); Offsets[0] = DAG.getConstant(0, DL, MVT::i32);
Offsets[1] = DAG.getConstant(SOffset, DL, MVT::i32); Offsets[1] = DAG.getConstant(SOffset, DL, MVT::i32);
Offsets[2] = DAG.getTargetConstant(ImmOffset, DL, MVT::i32); Offsets[2] = DAG.getConstant(ImmOffset, DL, MVT::i32);
return; return;
} }
} }
@ -7128,13 +7128,13 @@ void SITargetLowering::setBufferOffsets(SDValue CombinedOffset,
Subtarget, Align)) { Subtarget, Align)) {
Offsets[0] = N0; Offsets[0] = N0;
Offsets[1] = DAG.getConstant(SOffset, DL, MVT::i32); Offsets[1] = DAG.getConstant(SOffset, DL, MVT::i32);
Offsets[2] = DAG.getTargetConstant(ImmOffset, DL, MVT::i32); Offsets[2] = DAG.getConstant(ImmOffset, DL, MVT::i32);
return; return;
} }
} }
Offsets[0] = CombinedOffset; Offsets[0] = CombinedOffset;
Offsets[1] = DAG.getConstant(0, DL, MVT::i32); Offsets[1] = DAG.getConstant(0, DL, MVT::i32);
Offsets[2] = DAG.getTargetConstant(0, DL, MVT::i32); Offsets[2] = DAG.getConstant(0, DL, MVT::i32);
} }
// Handle 8 bit and 16 bit buffer loads // Handle 8 bit and 16 bit buffer loads

View File

@ -43,8 +43,8 @@ multiclass V_INTERP_P1_F32_m : VINTRP_m <
(outs VINTRPDst:$vdst), (outs VINTRPDst:$vdst),
(ins VGPR_32:$vsrc, Attr:$attr, AttrChan:$attrchan), (ins VGPR_32:$vsrc, Attr:$attr, AttrChan:$attrchan),
"v_interp_p1_f32$vdst, $vsrc, $attr$attrchan", "v_interp_p1_f32$vdst, $vsrc, $attr$attrchan",
[(set f32:$vdst, (AMDGPUinterp_p1 f32:$vsrc, (i32 timm:$attrchan), [(set f32:$vdst, (AMDGPUinterp_p1 f32:$vsrc, (i32 imm:$attrchan),
(i32 timm:$attr)))] (i32 imm:$attr)))]
>; >;
let OtherPredicates = [has32BankLDS] in { let OtherPredicates = [has32BankLDS] in {
@ -66,8 +66,8 @@ defm V_INTERP_P2_F32 : VINTRP_m <
(outs VINTRPDst:$vdst), (outs VINTRPDst:$vdst),
(ins VGPR_32:$src0, VGPR_32:$vsrc, Attr:$attr, AttrChan:$attrchan), (ins VGPR_32:$src0, VGPR_32:$vsrc, Attr:$attr, AttrChan:$attrchan),
"v_interp_p2_f32$vdst, $vsrc, $attr$attrchan", "v_interp_p2_f32$vdst, $vsrc, $attr$attrchan",
[(set f32:$vdst, (AMDGPUinterp_p2 f32:$src0, f32:$vsrc, (i32 timm:$attrchan), [(set f32:$vdst, (AMDGPUinterp_p2 f32:$src0, f32:$vsrc, (i32 imm:$attrchan),
(i32 timm:$attr)))]>; (i32 imm:$attr)))]>;
} // End DisableEncoding = "$src0", Constraints = "$src0 = $vdst" } // End DisableEncoding = "$src0", Constraints = "$src0 = $vdst"
@ -76,8 +76,8 @@ defm V_INTERP_MOV_F32 : VINTRP_m <
(outs VINTRPDst:$vdst), (outs VINTRPDst:$vdst),
(ins InterpSlot:$vsrc, Attr:$attr, AttrChan:$attrchan), (ins InterpSlot:$vsrc, Attr:$attr, AttrChan:$attrchan),
"v_interp_mov_f32$vdst, $vsrc, $attr$attrchan", "v_interp_mov_f32$vdst, $vsrc, $attr$attrchan",
[(set f32:$vdst, (AMDGPUinterp_mov (i32 imm:$vsrc), (i32 timm:$attrchan), [(set f32:$vdst, (AMDGPUinterp_mov (i32 imm:$vsrc), (i32 imm:$attrchan),
(i32 timm:$attr)))]>; (i32 imm:$attr)))]>;
} // End Uses = [M0, EXEC] } // End Uses = [M0, EXEC]

View File

@ -1090,7 +1090,7 @@ def S_WAKEUP : SOPP <0x00000003, (ins), "s_wakeup"> {
let mayLoad = 1, mayStore = 1, hasSideEffects = 1 in let mayLoad = 1, mayStore = 1, hasSideEffects = 1 in
def S_WAITCNT : SOPP <0x0000000c, (ins WAIT_FLAG:$simm16), "s_waitcnt $simm16", def S_WAITCNT : SOPP <0x0000000c, (ins WAIT_FLAG:$simm16), "s_waitcnt $simm16",
[(int_amdgcn_s_waitcnt timm:$simm16)]>; [(int_amdgcn_s_waitcnt UIMM16bit:$simm16)]>;
def S_SETHALT : SOPP <0x0000000d, (ins i16imm:$simm16), "s_sethalt $simm16">; def S_SETHALT : SOPP <0x0000000d, (ins i16imm:$simm16), "s_sethalt $simm16">;
def S_SETKILL : SOPP <0x0000000b, (ins i16imm:$simm16), "s_setkill $simm16">; def S_SETKILL : SOPP <0x0000000b, (ins i16imm:$simm16), "s_setkill $simm16">;
@ -1099,7 +1099,7 @@ def S_SETKILL : SOPP <0x0000000b, (ins i16imm:$simm16), "s_setkill $simm16">;
// maximum reported is 960 cycles, so 960 / 64 = 15 max, so is the // maximum reported is 960 cycles, so 960 / 64 = 15 max, so is the
// maximum really 15 on VI? // maximum really 15 on VI?
def S_SLEEP : SOPP <0x0000000e, (ins i32imm:$simm16), def S_SLEEP : SOPP <0x0000000e, (ins i32imm:$simm16),
"s_sleep $simm16", [(int_amdgcn_s_sleep timm:$simm16)]> { "s_sleep $simm16", [(int_amdgcn_s_sleep SIMM16bit:$simm16)]> {
let hasSideEffects = 1; let hasSideEffects = 1;
let mayLoad = 1; let mayLoad = 1;
let mayStore = 1; let mayStore = 1;
@ -1110,10 +1110,10 @@ def S_SETPRIO : SOPP <0x0000000f, (ins i16imm:$simm16), "s_setprio $simm16">;
let Uses = [EXEC, M0] in { let Uses = [EXEC, M0] in {
// FIXME: Should this be mayLoad+mayStore? // FIXME: Should this be mayLoad+mayStore?
def S_SENDMSG : SOPP <0x00000010, (ins SendMsgImm:$simm16), "s_sendmsg $simm16", def S_SENDMSG : SOPP <0x00000010, (ins SendMsgImm:$simm16), "s_sendmsg $simm16",
[(int_amdgcn_s_sendmsg (i32 timm:$simm16), M0)]>; [(int_amdgcn_s_sendmsg (i32 imm:$simm16), M0)]>;
def S_SENDMSGHALT : SOPP <0x00000011, (ins SendMsgImm:$simm16), "s_sendmsghalt $simm16", def S_SENDMSGHALT : SOPP <0x00000011, (ins SendMsgImm:$simm16), "s_sendmsghalt $simm16",
[(int_amdgcn_s_sendmsghalt (i32 timm:$simm16), M0)]>; [(int_amdgcn_s_sendmsghalt (i32 imm:$simm16), M0)]>;
} // End Uses = [EXEC, M0] } // End Uses = [EXEC, M0]
@ -1125,13 +1125,13 @@ def S_ICACHE_INV : SOPP <0x00000013, (ins), "s_icache_inv"> {
let simm16 = 0; let simm16 = 0;
} }
def S_INCPERFLEVEL : SOPP <0x00000014, (ins i32imm:$simm16), "s_incperflevel $simm16", def S_INCPERFLEVEL : SOPP <0x00000014, (ins i32imm:$simm16), "s_incperflevel $simm16",
[(int_amdgcn_s_incperflevel timm:$simm16)]> { [(int_amdgcn_s_incperflevel SIMM16bit:$simm16)]> {
let hasSideEffects = 1; let hasSideEffects = 1;
let mayLoad = 1; let mayLoad = 1;
let mayStore = 1; let mayStore = 1;
} }
def S_DECPERFLEVEL : SOPP <0x00000015, (ins i32imm:$simm16), "s_decperflevel $simm16", def S_DECPERFLEVEL : SOPP <0x00000015, (ins i32imm:$simm16), "s_decperflevel $simm16",
[(int_amdgcn_s_decperflevel timm:$simm16)]> { [(int_amdgcn_s_decperflevel SIMM16bit:$simm16)]> {
let hasSideEffects = 1; let hasSideEffects = 1;
let mayLoad = 1; let mayLoad = 1;
let mayStore = 1; let mayStore = 1;
@ -1180,7 +1180,7 @@ let SubtargetPredicate = isGFX10Plus in {
// S_GETREG_B32 Intrinsic Pattern. // S_GETREG_B32 Intrinsic Pattern.
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
def : GCNPat < def : GCNPat <
(int_amdgcn_s_getreg timm:$simm16), (int_amdgcn_s_getreg imm:$simm16),
(S_GETREG_B32 (as_i16imm $simm16)) (S_GETREG_B32 (as_i16imm $simm16))
>; >;

View File

@ -841,16 +841,16 @@ def V_MOVRELD_B32_V16 : V_MOVRELD_B32_pseudo<VReg_512>;
let OtherPredicates = [isGFX8GFX9] in { let OtherPredicates = [isGFX8GFX9] in {
def : GCNPat < def : GCNPat <
(i32 (int_amdgcn_mov_dpp i32:$src, timm:$dpp_ctrl, timm:$row_mask, timm:$bank_mask, (i32 (int_amdgcn_mov_dpp i32:$src, imm:$dpp_ctrl, imm:$row_mask, imm:$bank_mask,
timm:$bound_ctrl)), imm:$bound_ctrl)),
(V_MOV_B32_dpp $src, $src, (as_i32imm $dpp_ctrl), (V_MOV_B32_dpp $src, $src, (as_i32imm $dpp_ctrl),
(as_i32imm $row_mask), (as_i32imm $bank_mask), (as_i32imm $row_mask), (as_i32imm $bank_mask),
(as_i1imm $bound_ctrl)) (as_i1imm $bound_ctrl))
>; >;
def : GCNPat < def : GCNPat <
(i32 (int_amdgcn_update_dpp i32:$old, i32:$src, timm:$dpp_ctrl, timm:$row_mask, (i32 (int_amdgcn_update_dpp i32:$old, i32:$src, imm:$dpp_ctrl, imm:$row_mask,
timm:$bank_mask, timm:$bound_ctrl)), imm:$bank_mask, imm:$bound_ctrl)),
(V_MOV_B32_dpp $old, $src, (as_i32imm $dpp_ctrl), (V_MOV_B32_dpp $old, $src, (as_i32imm $dpp_ctrl),
(as_i32imm $row_mask), (as_i32imm $bank_mask), (as_i32imm $row_mask), (as_i32imm $bank_mask),
(as_i1imm $bound_ctrl)) (as_i1imm $bound_ctrl))
@ -911,21 +911,21 @@ defm V_SCREEN_PARTITION_4SE_B32 : VOP1_Real_gfx9 <0x37>;
let OtherPredicates = [isGFX10Plus] in { let OtherPredicates = [isGFX10Plus] in {
def : GCNPat < def : GCNPat <
(i32 (int_amdgcn_mov_dpp8 i32:$src, timm:$dpp8)), (i32 (int_amdgcn_mov_dpp8 i32:$src, imm:$dpp8)),
(V_MOV_B32_dpp8_gfx10 $src, $src, (as_i32imm $dpp8), (i32 DPP8Mode.FI_0)) (V_MOV_B32_dpp8_gfx10 $src, $src, (as_i32imm $dpp8), (i32 DPP8Mode.FI_0))
>; >;
def : GCNPat < def : GCNPat <
(i32 (int_amdgcn_mov_dpp i32:$src, timm:$dpp_ctrl, timm:$row_mask, timm:$bank_mask, (i32 (int_amdgcn_mov_dpp i32:$src, imm:$dpp_ctrl, imm:$row_mask, imm:$bank_mask,
timm:$bound_ctrl)), imm:$bound_ctrl)),
(V_MOV_B32_dpp_gfx10 $src, $src, (as_i32imm $dpp_ctrl), (V_MOV_B32_dpp_gfx10 $src, $src, (as_i32imm $dpp_ctrl),
(as_i32imm $row_mask), (as_i32imm $bank_mask), (as_i32imm $row_mask), (as_i32imm $bank_mask),
(as_i1imm $bound_ctrl), (i32 0)) (as_i1imm $bound_ctrl), (i32 0))
>; >;
def : GCNPat < def : GCNPat <
(i32 (int_amdgcn_update_dpp i32:$old, i32:$src, timm:$dpp_ctrl, timm:$row_mask, (i32 (int_amdgcn_update_dpp i32:$old, i32:$src, imm:$dpp_ctrl, imm:$row_mask,
timm:$bank_mask, timm:$bound_ctrl)), imm:$bank_mask, imm:$bound_ctrl)),
(V_MOV_B32_dpp_gfx10 $old, $src, (as_i32imm $dpp_ctrl), (V_MOV_B32_dpp_gfx10 $old, $src, (as_i32imm $dpp_ctrl),
(as_i32imm $row_mask), (as_i32imm $bank_mask), (as_i32imm $row_mask), (as_i32imm $bank_mask),
(as_i1imm $bound_ctrl), (i32 0)) (as_i1imm $bound_ctrl), (i32 0))

View File

@ -112,7 +112,7 @@ class getVOP3ClampPat<VOPProfile P, SDPatternOperator node> {
class getVOP3MAIPat<VOPProfile P, SDPatternOperator node> { class getVOP3MAIPat<VOPProfile P, SDPatternOperator node> {
list<dag> ret = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2, list<dag> ret = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2,
timm:$cbsz, timm:$abid, timm:$blgp))]; imm:$cbsz, imm:$abid, imm:$blgp))];
} }
class VOP3Inst<string OpName, VOPProfile P, SDPatternOperator node = null_frag, bit VOP3Only = 0> : class VOP3Inst<string OpName, VOPProfile P, SDPatternOperator node = null_frag, bit VOP3Only = 0> :
@ -453,13 +453,13 @@ let FPDPRounding = 1 in {
def V_MAD_F16 : VOP3Inst <"v_mad_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, fmad>; def V_MAD_F16 : VOP3Inst <"v_mad_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, fmad>;
let Uses = [M0, EXEC] in { let Uses = [M0, EXEC] in {
def V_INTERP_P2_F16 : VOP3Interp <"v_interp_p2_f16", VOP3_INTERP16<[f16, f32, i32, f32]>, def V_INTERP_P2_F16 : VOP3Interp <"v_interp_p2_f16", VOP3_INTERP16<[f16, f32, i32, f32]>,
[(set f16:$vdst, (AMDGPUinterp_p2_f16 f32:$src0, (i32 timm:$attrchan), [(set f16:$vdst, (AMDGPUinterp_p2_f16 f32:$src0, (i32 imm:$attrchan),
(i32 timm:$attr), (i32 imm:$attr),
(i32 timm:$src0_modifiers), (i32 imm:$src0_modifiers),
(f32 VRegSrc_32:$src2), (f32 VRegSrc_32:$src2),
(i32 timm:$src2_modifiers), (i32 imm:$src2_modifiers),
(i1 timm:$high), (i1 imm:$high),
(i1 timm:$clamp)))]>; (i1 imm:$clamp)))]>;
} // End Uses = [M0, EXEC] } // End Uses = [M0, EXEC]
} // End FPDPRounding = 1 } // End FPDPRounding = 1
} // End renamedInGFX9 = 1 } // End renamedInGFX9 = 1
@ -478,21 +478,21 @@ def V_INTERP_P2_F16_gfx9 : VOP3Interp <"v_interp_p2_f16_gfx9", VOP3_INTERP16<[f1
let Uses = [M0, EXEC], FPDPRounding = 1 in { let Uses = [M0, EXEC], FPDPRounding = 1 in {
def V_INTERP_P1LL_F16 : VOP3Interp <"v_interp_p1ll_f16", VOP3_INTERP16<[f32, f32, i32, untyped]>, def V_INTERP_P1LL_F16 : VOP3Interp <"v_interp_p1ll_f16", VOP3_INTERP16<[f32, f32, i32, untyped]>,
[(set f32:$vdst, (AMDGPUinterp_p1ll_f16 f32:$src0, (i32 timm:$attrchan), [(set f32:$vdst, (AMDGPUinterp_p1ll_f16 f32:$src0, (i32 imm:$attrchan),
(i32 timm:$attr), (i32 imm:$attr),
(i32 timm:$src0_modifiers), (i32 imm:$src0_modifiers),
(i1 timm:$high), (i1 imm:$high),
(i1 timm:$clamp), (i1 imm:$clamp),
(i32 timm:$omod)))]>; (i32 imm:$omod)))]>;
def V_INTERP_P1LV_F16 : VOP3Interp <"v_interp_p1lv_f16", VOP3_INTERP16<[f32, f32, i32, f16]>, def V_INTERP_P1LV_F16 : VOP3Interp <"v_interp_p1lv_f16", VOP3_INTERP16<[f32, f32, i32, f16]>,
[(set f32:$vdst, (AMDGPUinterp_p1lv_f16 f32:$src0, (i32 timm:$attrchan), [(set f32:$vdst, (AMDGPUinterp_p1lv_f16 f32:$src0, (i32 imm:$attrchan),
(i32 timm:$attr), (i32 imm:$attr),
(i32 timm:$src0_modifiers), (i32 imm:$src0_modifiers),
(f32 VRegSrc_32:$src2), (f32 VRegSrc_32:$src2),
(i32 timm:$src2_modifiers), (i32 imm:$src2_modifiers),
(i1 timm:$high), (i1 imm:$high),
(i1 timm:$clamp), (i1 imm:$clamp),
(i32 timm:$omod)))]>; (i32 imm:$omod)))]>;
} // End Uses = [M0, EXEC], FPDPRounding = 1 } // End Uses = [M0, EXEC], FPDPRounding = 1
} // End SubtargetPredicate = Has16BitInsts, isCommutable = 1 } // End SubtargetPredicate = Has16BitInsts, isCommutable = 1
@ -642,11 +642,11 @@ let SubtargetPredicate = isGFX10Plus in {
} // End $vdst = $vdst_in, DisableEncoding $vdst_in } // End $vdst = $vdst_in, DisableEncoding $vdst_in
def : GCNPat< def : GCNPat<
(int_amdgcn_permlane16 i32:$vdst_in, i32:$src0, i32:$src1, i32:$src2, timm:$fi, timm:$bc), (int_amdgcn_permlane16 i32:$vdst_in, i32:$src0, i32:$src1, i32:$src2, imm:$fi, imm:$bc),
(V_PERMLANE16_B32 (as_i1imm $fi), $src0, (as_i1imm $bc), $src1, 0, $src2, $vdst_in) (V_PERMLANE16_B32 (as_i1imm $fi), $src0, (as_i1imm $bc), $src1, 0, $src2, $vdst_in)
>; >;
def : GCNPat< def : GCNPat<
(int_amdgcn_permlanex16 i32:$vdst_in, i32:$src0, i32:$src1, i32:$src2, timm:$fi, timm:$bc), (int_amdgcn_permlanex16 i32:$vdst_in, i32:$src0, i32:$src1, i32:$src2, imm:$fi, imm:$bc),
(V_PERMLANEX16_B32 (as_i1imm $fi), $src0, (as_i1imm $bc), $src1, 0, $src2, $vdst_in) (V_PERMLANEX16_B32 (as_i1imm $fi), $src0, (as_i1imm $bc), $src1, 0, $src2, $vdst_in)
>; >;
} // End SubtargetPredicate = isGFX10Plus } // End SubtargetPredicate = isGFX10Plus

View File

@ -3116,12 +3116,12 @@ ARMTargetLowering::LowerGlobalTLSAddressWindows(SDValue Op,
// Load the current TEB (thread environment block) // Load the current TEB (thread environment block)
SDValue Ops[] = {Chain, SDValue Ops[] = {Chain,
DAG.getTargetConstant(Intrinsic::arm_mrc, DL, MVT::i32), DAG.getConstant(Intrinsic::arm_mrc, DL, MVT::i32),
DAG.getTargetConstant(15, DL, MVT::i32), DAG.getConstant(15, DL, MVT::i32),
DAG.getTargetConstant(0, DL, MVT::i32), DAG.getConstant(0, DL, MVT::i32),
DAG.getTargetConstant(13, DL, MVT::i32), DAG.getConstant(13, DL, MVT::i32),
DAG.getTargetConstant(0, DL, MVT::i32), DAG.getConstant(0, DL, MVT::i32),
DAG.getTargetConstant(2, DL, MVT::i32)}; DAG.getConstant(2, DL, MVT::i32)};
SDValue CurrentTEB = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, SDValue CurrentTEB = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
DAG.getVTList(MVT::i32, MVT::Other), Ops); DAG.getVTList(MVT::i32, MVT::Other), Ops);
@ -8910,12 +8910,12 @@ static void ReplaceREADCYCLECOUNTER(SDNode *N,
// Under Power Management extensions, the cycle-count is: // Under Power Management extensions, the cycle-count is:
// mrc p15, #0, <Rt>, c9, c13, #0 // mrc p15, #0, <Rt>, c9, c13, #0
SDValue Ops[] = { N->getOperand(0), // Chain SDValue Ops[] = { N->getOperand(0), // Chain
DAG.getTargetConstant(Intrinsic::arm_mrc, DL, MVT::i32), DAG.getConstant(Intrinsic::arm_mrc, DL, MVT::i32),
DAG.getTargetConstant(15, DL, MVT::i32), DAG.getConstant(15, DL, MVT::i32),
DAG.getTargetConstant(0, DL, MVT::i32), DAG.getConstant(0, DL, MVT::i32),
DAG.getTargetConstant(9, DL, MVT::i32), DAG.getConstant(9, DL, MVT::i32),
DAG.getTargetConstant(13, DL, MVT::i32), DAG.getConstant(13, DL, MVT::i32),
DAG.getTargetConstant(0, DL, MVT::i32) DAG.getConstant(0, DL, MVT::i32)
}; };
SDValue Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, SDValue Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,

View File

@ -5110,8 +5110,8 @@ def SWPB: AIswp<1, (outs GPRnopc:$Rt),
def CDP : ABI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1, def CDP : ABI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1,
c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2),
NoItinerary, "cdp", "\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2", NoItinerary, "cdp", "\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
[(int_arm_cdp timm:$cop, timm:$opc1, timm:$CRd, timm:$CRn, [(int_arm_cdp imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn,
timm:$CRm, timm:$opc2)]>, imm:$CRm, imm:$opc2)]>,
Requires<[IsARM,PreV8]> { Requires<[IsARM,PreV8]> {
bits<4> opc1; bits<4> opc1;
bits<4> CRn; bits<4> CRn;
@ -5134,8 +5134,8 @@ def CDP : ABI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1,
def CDP2 : ABXI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1, def CDP2 : ABXI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1,
c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2),
NoItinerary, "cdp2\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2", NoItinerary, "cdp2\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
[(int_arm_cdp2 timm:$cop, timm:$opc1, timm:$CRd, timm:$CRn, [(int_arm_cdp2 imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn,
timm:$CRm, timm:$opc2)]>, imm:$CRm, imm:$opc2)]>,
Requires<[IsARM,PreV8]> { Requires<[IsARM,PreV8]> {
let Inst{31-28} = 0b1111; let Inst{31-28} = 0b1111;
bits<4> opc1; bits<4> opc1;
@ -5314,15 +5314,15 @@ multiclass LdSt2Cop<bit load, bit Dbit, string asm, list<dag> pattern> {
} }
} }
defm LDC : LdStCop <1, 0, "ldc", [(int_arm_ldc timm:$cop, timm:$CRd, addrmode5:$addr)]>; defm LDC : LdStCop <1, 0, "ldc", [(int_arm_ldc imm:$cop, imm:$CRd, addrmode5:$addr)]>;
defm LDCL : LdStCop <1, 1, "ldcl", [(int_arm_ldcl timm:$cop, timm:$CRd, addrmode5:$addr)]>; defm LDCL : LdStCop <1, 1, "ldcl", [(int_arm_ldcl imm:$cop, imm:$CRd, addrmode5:$addr)]>;
defm LDC2 : LdSt2Cop<1, 0, "ldc2", [(int_arm_ldc2 timm:$cop, timm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>; defm LDC2 : LdSt2Cop<1, 0, "ldc2", [(int_arm_ldc2 imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>;
defm LDC2L : LdSt2Cop<1, 1, "ldc2l", [(int_arm_ldc2l timm:$cop, timm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>; defm LDC2L : LdSt2Cop<1, 1, "ldc2l", [(int_arm_ldc2l imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>;
defm STC : LdStCop <0, 0, "stc", [(int_arm_stc timm:$cop, timm:$CRd, addrmode5:$addr)]>; defm STC : LdStCop <0, 0, "stc", [(int_arm_stc imm:$cop, imm:$CRd, addrmode5:$addr)]>;
defm STCL : LdStCop <0, 1, "stcl", [(int_arm_stcl timm:$cop, timm:$CRd, addrmode5:$addr)]>; defm STCL : LdStCop <0, 1, "stcl", [(int_arm_stcl imm:$cop, imm:$CRd, addrmode5:$addr)]>;
defm STC2 : LdSt2Cop<0, 0, "stc2", [(int_arm_stc2 timm:$cop, timm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>; defm STC2 : LdSt2Cop<0, 0, "stc2", [(int_arm_stc2 imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>;
defm STC2L : LdSt2Cop<0, 1, "stc2l", [(int_arm_stc2l timm:$cop, timm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>; defm STC2L : LdSt2Cop<0, 1, "stc2l", [(int_arm_stc2l imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>;
} // DecoderNamespace = "CoProc" } // DecoderNamespace = "CoProc"
@ -5358,8 +5358,8 @@ def MCR : MovRCopro<"mcr", 0 /* from ARM core register to coprocessor */,
(outs), (outs),
(ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
c_imm:$CRm, imm0_7:$opc2), c_imm:$CRm, imm0_7:$opc2),
[(int_arm_mcr timm:$cop, timm:$opc1, GPR:$Rt, timm:$CRn, [(int_arm_mcr imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
timm:$CRm, timm:$opc2)]>, imm:$CRm, imm:$opc2)]>,
ComplexDeprecationPredicate<"MCR">; ComplexDeprecationPredicate<"MCR">;
def : ARMInstAlias<"mcr${p} $cop, $opc1, $Rt, $CRn, $CRm", def : ARMInstAlias<"mcr${p} $cop, $opc1, $Rt, $CRn, $CRm",
(MCR p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, (MCR p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
@ -5372,8 +5372,8 @@ def : ARMInstAlias<"mrc${p} $cop, $opc1, $Rt, $CRn, $CRm",
(MRC GPRwithAPSR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, (MRC GPRwithAPSR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
c_imm:$CRm, 0, pred:$p)>; c_imm:$CRm, 0, pred:$p)>;
def : ARMPat<(int_arm_mrc timm:$cop, timm:$opc1, timm:$CRn, timm:$CRm, timm:$opc2), def : ARMPat<(int_arm_mrc imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2),
(MRC p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2)>; (MRC imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>;
class MovRCopro2<string opc, bit direction, dag oops, dag iops, class MovRCopro2<string opc, bit direction, dag oops, dag iops,
list<dag> pattern> list<dag> pattern>
@ -5404,8 +5404,8 @@ def MCR2 : MovRCopro2<"mcr2", 0 /* from ARM core register to coprocessor */,
(outs), (outs),
(ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
c_imm:$CRm, imm0_7:$opc2), c_imm:$CRm, imm0_7:$opc2),
[(int_arm_mcr2 timm:$cop, timm:$opc1, GPR:$Rt, timm:$CRn, [(int_arm_mcr2 imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
timm:$CRm, timm:$opc2)]>, imm:$CRm, imm:$opc2)]>,
Requires<[IsARM,PreV8]>; Requires<[IsARM,PreV8]>;
def : ARMInstAlias<"mcr2 $cop, $opc1, $Rt, $CRn, $CRm", def : ARMInstAlias<"mcr2 $cop, $opc1, $Rt, $CRn, $CRm",
(MCR2 p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, (MCR2 p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
@ -5419,9 +5419,9 @@ def : ARMInstAlias<"mrc2 $cop, $opc1, $Rt, $CRn, $CRm",
(MRC2 GPRwithAPSR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, (MRC2 GPRwithAPSR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
c_imm:$CRm, 0)>; c_imm:$CRm, 0)>;
def : ARMV5TPat<(int_arm_mrc2 timm:$cop, timm:$opc1, timm:$CRn, def : ARMV5TPat<(int_arm_mrc2 imm:$cop, imm:$opc1, imm:$CRn,
timm:$CRm, timm:$opc2), imm:$CRm, imm:$opc2),
(MRC2 p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2)>; (MRC2 imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>;
class MovRRCopro<string opc, bit direction, dag oops, dag iops, list<dag> class MovRRCopro<string opc, bit direction, dag oops, dag iops, list<dag>
pattern = []> pattern = []>
@ -5447,8 +5447,8 @@ class MovRRCopro<string opc, bit direction, dag oops, dag iops, list<dag>
def MCRR : MovRRCopro<"mcrr", 0 /* from ARM core register to coprocessor */, def MCRR : MovRRCopro<"mcrr", 0 /* from ARM core register to coprocessor */,
(outs), (ins p_imm:$cop, imm0_15:$opc1, GPRnopc:$Rt, (outs), (ins p_imm:$cop, imm0_15:$opc1, GPRnopc:$Rt,
GPRnopc:$Rt2, c_imm:$CRm), GPRnopc:$Rt2, c_imm:$CRm),
[(int_arm_mcrr timm:$cop, timm:$opc1, GPRnopc:$Rt, [(int_arm_mcrr imm:$cop, imm:$opc1, GPRnopc:$Rt,
GPRnopc:$Rt2, timm:$CRm)]>; GPRnopc:$Rt2, imm:$CRm)]>;
def MRRC : MovRRCopro<"mrrc", 1 /* from coprocessor to ARM core register */, def MRRC : MovRRCopro<"mrrc", 1 /* from coprocessor to ARM core register */,
(outs GPRnopc:$Rt, GPRnopc:$Rt2), (outs GPRnopc:$Rt, GPRnopc:$Rt2),
(ins p_imm:$cop, imm0_15:$opc1, c_imm:$CRm), []>; (ins p_imm:$cop, imm0_15:$opc1, c_imm:$CRm), []>;
@ -5480,8 +5480,8 @@ class MovRRCopro2<string opc, bit direction, dag oops, dag iops,
def MCRR2 : MovRRCopro2<"mcrr2", 0 /* from ARM core register to coprocessor */, def MCRR2 : MovRRCopro2<"mcrr2", 0 /* from ARM core register to coprocessor */,
(outs), (ins p_imm:$cop, imm0_15:$opc1, GPRnopc:$Rt, (outs), (ins p_imm:$cop, imm0_15:$opc1, GPRnopc:$Rt,
GPRnopc:$Rt2, c_imm:$CRm), GPRnopc:$Rt2, c_imm:$CRm),
[(int_arm_mcrr2 timm:$cop, timm:$opc1, GPRnopc:$Rt, [(int_arm_mcrr2 imm:$cop, imm:$opc1, GPRnopc:$Rt,
GPRnopc:$Rt2, timm:$CRm)]>; GPRnopc:$Rt2, imm:$CRm)]>;
def MRRC2 : MovRRCopro2<"mrrc2", 1 /* from coprocessor to ARM core register */, def MRRC2 : MovRRCopro2<"mrrc2", 1 /* from coprocessor to ARM core register */,
(outs GPRnopc:$Rt, GPRnopc:$Rt2), (outs GPRnopc:$Rt, GPRnopc:$Rt2),
@ -6159,7 +6159,7 @@ def ITasm : ARMAsmPseudo<"it$mask $cc", (ins it_pred:$cc, it_mask:$mask)>,
let mayLoad = 1, mayStore =1, hasSideEffects = 1 in let mayLoad = 1, mayStore =1, hasSideEffects = 1 in
def SPACE : PseudoInst<(outs GPR:$Rd), (ins i32imm:$size, GPR:$Rn), def SPACE : PseudoInst<(outs GPR:$Rd), (ins i32imm:$size, GPR:$Rn),
NoItinerary, NoItinerary,
[(set GPR:$Rd, (int_arm_space timm:$size, GPR:$Rn))]>; [(set GPR:$Rd, (int_arm_space imm:$size, GPR:$Rn))]>;
//===---------------------------------- //===----------------------------------
// Atomic cmpxchg for -O0 // Atomic cmpxchg for -O0

View File

@ -4175,15 +4175,15 @@ multiclass t2LdStCop<bits<4> op31_28, bit load, bit Dbit, string asm, list<dag>
} }
let DecoderNamespace = "Thumb2CoProc" in { let DecoderNamespace = "Thumb2CoProc" in {
defm t2LDC : t2LdStCop<0b1110, 1, 0, "ldc", [(int_arm_ldc timm:$cop, timm:$CRd, addrmode5:$addr)]>; defm t2LDC : t2LdStCop<0b1110, 1, 0, "ldc", [(int_arm_ldc imm:$cop, imm:$CRd, addrmode5:$addr)]>;
defm t2LDCL : t2LdStCop<0b1110, 1, 1, "ldcl", [(int_arm_ldcl timm:$cop, timm:$CRd, addrmode5:$addr)]>; defm t2LDCL : t2LdStCop<0b1110, 1, 1, "ldcl", [(int_arm_ldcl imm:$cop, imm:$CRd, addrmode5:$addr)]>;
defm t2LDC2 : t2LdStCop<0b1111, 1, 0, "ldc2", [(int_arm_ldc2 timm:$cop, timm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>; defm t2LDC2 : t2LdStCop<0b1111, 1, 0, "ldc2", [(int_arm_ldc2 imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>;
defm t2LDC2L : t2LdStCop<0b1111, 1, 1, "ldc2l", [(int_arm_ldc2l timm:$cop, timm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>; defm t2LDC2L : t2LdStCop<0b1111, 1, 1, "ldc2l", [(int_arm_ldc2l imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>;
defm t2STC : t2LdStCop<0b1110, 0, 0, "stc", [(int_arm_stc timm:$cop, timm:$CRd, addrmode5:$addr)]>; defm t2STC : t2LdStCop<0b1110, 0, 0, "stc", [(int_arm_stc imm:$cop, imm:$CRd, addrmode5:$addr)]>;
defm t2STCL : t2LdStCop<0b1110, 0, 1, "stcl", [(int_arm_stcl timm:$cop, timm:$CRd, addrmode5:$addr)]>; defm t2STCL : t2LdStCop<0b1110, 0, 1, "stcl", [(int_arm_stcl imm:$cop, imm:$CRd, addrmode5:$addr)]>;
defm t2STC2 : t2LdStCop<0b1111, 0, 0, "stc2", [(int_arm_stc2 timm:$cop, timm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>; defm t2STC2 : t2LdStCop<0b1111, 0, 0, "stc2", [(int_arm_stc2 imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>;
defm t2STC2L : t2LdStCop<0b1111, 0, 1, "stc2l", [(int_arm_stc2l timm:$cop, timm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>; defm t2STC2L : t2LdStCop<0b1111, 0, 1, "stc2l", [(int_arm_stc2l imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>;
} }
@ -4368,8 +4368,8 @@ def t2MCR : t2MovRCopro<0b1110, "mcr", 0,
(outs), (outs),
(ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
c_imm:$CRm, imm0_7:$opc2), c_imm:$CRm, imm0_7:$opc2),
[(int_arm_mcr timm:$cop, timm:$opc1, GPR:$Rt, timm:$CRn, [(int_arm_mcr imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
timm:$CRm, timm:$opc2)]>, imm:$CRm, imm:$opc2)]>,
ComplexDeprecationPredicate<"MCR">; ComplexDeprecationPredicate<"MCR">;
def : t2InstAlias<"mcr${p} $cop, $opc1, $Rt, $CRn, $CRm", def : t2InstAlias<"mcr${p} $cop, $opc1, $Rt, $CRn, $CRm",
(t2MCR p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, (t2MCR p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
@ -4377,8 +4377,8 @@ def : t2InstAlias<"mcr${p} $cop, $opc1, $Rt, $CRn, $CRm",
def t2MCR2 : t2MovRCopro<0b1111, "mcr2", 0, def t2MCR2 : t2MovRCopro<0b1111, "mcr2", 0,
(outs), (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, (outs), (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
c_imm:$CRm, imm0_7:$opc2), c_imm:$CRm, imm0_7:$opc2),
[(int_arm_mcr2 timm:$cop, timm:$opc1, GPR:$Rt, timm:$CRn, [(int_arm_mcr2 imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
timm:$CRm, timm:$opc2)]> { imm:$CRm, imm:$opc2)]> {
let Predicates = [IsThumb2, PreV8]; let Predicates = [IsThumb2, PreV8];
} }
def : t2InstAlias<"mcr2${p} $cop, $opc1, $Rt, $CRn, $CRm", def : t2InstAlias<"mcr2${p} $cop, $opc1, $Rt, $CRn, $CRm",
@ -4402,24 +4402,24 @@ def : t2InstAlias<"mrc2${p} $cop, $opc1, $Rt, $CRn, $CRm",
(t2MRC2 GPRwithAPSR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, (t2MRC2 GPRwithAPSR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
c_imm:$CRm, 0, pred:$p)>; c_imm:$CRm, 0, pred:$p)>;
def : T2v6Pat<(int_arm_mrc timm:$cop, timm:$opc1, timm:$CRn, timm:$CRm, timm:$opc2), def : T2v6Pat<(int_arm_mrc imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2),
(t2MRC p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2)>; (t2MRC imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>;
def : T2v6Pat<(int_arm_mrc2 timm:$cop, timm:$opc1, timm:$CRn, timm:$CRm, timm:$opc2), def : T2v6Pat<(int_arm_mrc2 imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2),
(t2MRC2 p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2)>; (t2MRC2 imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>;
/* from ARM core register to coprocessor */ /* from ARM core register to coprocessor */
def t2MCRR : t2MovRRCopro<0b1110, "mcrr", 0, (outs), def t2MCRR : t2MovRRCopro<0b1110, "mcrr", 0, (outs),
(ins p_imm:$cop, imm0_15:$opc1, GPR:$Rt, GPR:$Rt2, (ins p_imm:$cop, imm0_15:$opc1, GPR:$Rt, GPR:$Rt2,
c_imm:$CRm), c_imm:$CRm),
[(int_arm_mcrr timm:$cop, timm:$opc1, GPR:$Rt, GPR:$Rt2, [(int_arm_mcrr imm:$cop, imm:$opc1, GPR:$Rt, GPR:$Rt2,
timm:$CRm)]>; imm:$CRm)]>;
def t2MCRR2 : t2MovRRCopro<0b1111, "mcrr2", 0, (outs), def t2MCRR2 : t2MovRRCopro<0b1111, "mcrr2", 0, (outs),
(ins p_imm:$cop, imm0_15:$opc1, GPR:$Rt, GPR:$Rt2, (ins p_imm:$cop, imm0_15:$opc1, GPR:$Rt, GPR:$Rt2,
c_imm:$CRm), c_imm:$CRm),
[(int_arm_mcrr2 timm:$cop, timm:$opc1, GPR:$Rt, [(int_arm_mcrr2 imm:$cop, imm:$opc1, GPR:$Rt,
GPR:$Rt2, timm:$CRm)]> { GPR:$Rt2, imm:$CRm)]> {
let Predicates = [IsThumb2, PreV8]; let Predicates = [IsThumb2, PreV8];
} }
@ -4439,8 +4439,8 @@ def t2MRRC2 : t2MovRRCopro<0b1111, "mrrc2", 1, (outs GPR:$Rt, GPR:$Rt2),
def t2CDP : T2Cop<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1, def t2CDP : T2Cop<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1,
c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2),
"cdp", "\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2", "cdp", "\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
[(int_arm_cdp timm:$cop, timm:$opc1, timm:$CRd, timm:$CRn, [(int_arm_cdp imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn,
timm:$CRm, timm:$opc2)]> { imm:$CRm, imm:$opc2)]> {
let Inst{27-24} = 0b1110; let Inst{27-24} = 0b1110;
bits<4> opc1; bits<4> opc1;
@ -4465,8 +4465,8 @@ def t2CDP : T2Cop<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1,
def t2CDP2 : T2Cop<0b1111, (outs), (ins p_imm:$cop, imm0_15:$opc1, def t2CDP2 : T2Cop<0b1111, (outs), (ins p_imm:$cop, imm0_15:$opc1,
c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2),
"cdp2", "\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2", "cdp2", "\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
[(int_arm_cdp2 timm:$cop, timm:$opc1, timm:$CRd, timm:$CRn, [(int_arm_cdp2 imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn,
timm:$CRm, timm:$opc2)]> { imm:$CRm, imm:$opc2)]> {
let Inst{27-24} = 0b1110; let Inst{27-24} = 0b1110;
bits<4> opc1; bits<4> opc1;

File diff suppressed because it is too large Load Diff

View File

@ -8,125 +8,120 @@
// Automatically generated file, please consult code owner before editing. // Automatically generated file, please consult code owner before editing.
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
multiclass ImmOpPred<code pred, ValueType vt = i32> {
def "" : PatLeaf<(vt imm), pred>;
def _timm : PatLeaf<(vt timm), pred>;
}
def s4_0ImmOperand : AsmOperandClass { let Name = "s4_0Imm"; let RenderMethod = "addSignedImmOperands"; } def s4_0ImmOperand : AsmOperandClass { let Name = "s4_0Imm"; let RenderMethod = "addSignedImmOperands"; }
def s4_0Imm : Operand<i32> { let ParserMatchClass = s4_0ImmOperand; let DecoderMethod = "s4_0ImmDecoder"; } def s4_0Imm : Operand<i32> { let ParserMatchClass = s4_0ImmOperand; let DecoderMethod = "s4_0ImmDecoder"; }
defm s4_0ImmPred : ImmOpPred<[{ return isShiftedInt<4, 0>(N->getSExtValue());}]>; def s4_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<4, 0>(N->getSExtValue());}]>;
def s29_3ImmOperand : AsmOperandClass { let Name = "s29_3Imm"; let RenderMethod = "addSignedImmOperands"; } def s29_3ImmOperand : AsmOperandClass { let Name = "s29_3Imm"; let RenderMethod = "addSignedImmOperands"; }
def s29_3Imm : Operand<i32> { let ParserMatchClass = s29_3ImmOperand; let DecoderMethod = "s29_3ImmDecoder"; } def s29_3Imm : Operand<i32> { let ParserMatchClass = s29_3ImmOperand; let DecoderMethod = "s29_3ImmDecoder"; }
defm s29_3ImmPred : ImmOpPred<[{ return isShiftedInt<32, 3>(N->getSExtValue());}]>; def s29_3ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<32, 3>(N->getSExtValue());}]>;
def u6_0ImmOperand : AsmOperandClass { let Name = "u6_0Imm"; let RenderMethod = "addImmOperands"; } def u6_0ImmOperand : AsmOperandClass { let Name = "u6_0Imm"; let RenderMethod = "addImmOperands"; }
def u6_0Imm : Operand<i32> { let ParserMatchClass = u6_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } def u6_0Imm : Operand<i32> { let ParserMatchClass = u6_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
defm u6_0ImmPred : ImmOpPred<[{ return isShiftedUInt<6, 0>(N->getSExtValue());}]>; def u6_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<6, 0>(N->getSExtValue());}]>;
def a30_2ImmOperand : AsmOperandClass { let Name = "a30_2Imm"; let RenderMethod = "addSignedImmOperands"; } def a30_2ImmOperand : AsmOperandClass { let Name = "a30_2Imm"; let RenderMethod = "addSignedImmOperands"; }
def a30_2Imm : Operand<i32> { let ParserMatchClass = a30_2ImmOperand; let DecoderMethod = "brtargetDecoder"; let PrintMethod = "printBrtarget"; } def a30_2Imm : Operand<i32> { let ParserMatchClass = a30_2ImmOperand; let DecoderMethod = "brtargetDecoder"; let PrintMethod = "printBrtarget"; }
defm a30_2ImmPred : ImmOpPred<[{ return isShiftedInt<32, 2>(N->getSExtValue());}]>; def a30_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<32, 2>(N->getSExtValue());}]>;
def u29_3ImmOperand : AsmOperandClass { let Name = "u29_3Imm"; let RenderMethod = "addImmOperands"; } def u29_3ImmOperand : AsmOperandClass { let Name = "u29_3Imm"; let RenderMethod = "addImmOperands"; }
def u29_3Imm : Operand<i32> { let ParserMatchClass = u29_3ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } def u29_3Imm : Operand<i32> { let ParserMatchClass = u29_3ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
defm u29_3ImmPred : ImmOpPred<[{ return isShiftedUInt<32, 3>(N->getSExtValue());}]>; def u29_3ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<32, 3>(N->getSExtValue());}]>;
def s8_0ImmOperand : AsmOperandClass { let Name = "s8_0Imm"; let RenderMethod = "addSignedImmOperands"; } def s8_0ImmOperand : AsmOperandClass { let Name = "s8_0Imm"; let RenderMethod = "addSignedImmOperands"; }
def s8_0Imm : Operand<i32> { let ParserMatchClass = s8_0ImmOperand; let DecoderMethod = "s8_0ImmDecoder"; } def s8_0Imm : Operand<i32> { let ParserMatchClass = s8_0ImmOperand; let DecoderMethod = "s8_0ImmDecoder"; }
defm s8_0ImmPred : ImmOpPred<[{ return isShiftedInt<8, 0>(N->getSExtValue());}]>; def s8_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<8, 0>(N->getSExtValue());}]>;
def u32_0ImmOperand : AsmOperandClass { let Name = "u32_0Imm"; let RenderMethod = "addImmOperands"; } def u32_0ImmOperand : AsmOperandClass { let Name = "u32_0Imm"; let RenderMethod = "addImmOperands"; }
def u32_0Imm : Operand<i32> { let ParserMatchClass = u32_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } def u32_0Imm : Operand<i32> { let ParserMatchClass = u32_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
defm u32_0ImmPred : ImmOpPred<[{ return isShiftedUInt<32, 0>(N->getSExtValue());}]>; def u32_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<32, 0>(N->getSExtValue());}]>;
def u4_2ImmOperand : AsmOperandClass { let Name = "u4_2Imm"; let RenderMethod = "addImmOperands"; } def u4_2ImmOperand : AsmOperandClass { let Name = "u4_2Imm"; let RenderMethod = "addImmOperands"; }
def u4_2Imm : Operand<i32> { let ParserMatchClass = u4_2ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } def u4_2Imm : Operand<i32> { let ParserMatchClass = u4_2ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
defm u4_2ImmPred : ImmOpPred<[{ return isShiftedUInt<4, 2>(N->getSExtValue());}]>; def u4_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<4, 2>(N->getSExtValue());}]>;
def u3_0ImmOperand : AsmOperandClass { let Name = "u3_0Imm"; let RenderMethod = "addImmOperands"; } def u3_0ImmOperand : AsmOperandClass { let Name = "u3_0Imm"; let RenderMethod = "addImmOperands"; }
def u3_0Imm : Operand<i32> { let ParserMatchClass = u3_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } def u3_0Imm : Operand<i32> { let ParserMatchClass = u3_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
defm u3_0ImmPred : ImmOpPred<[{ return isShiftedUInt<3, 0>(N->getSExtValue());}]>; def u3_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<3, 0>(N->getSExtValue());}]>;
def b15_2ImmOperand : AsmOperandClass { let Name = "b15_2Imm"; let RenderMethod = "addSignedImmOperands"; } def b15_2ImmOperand : AsmOperandClass { let Name = "b15_2Imm"; let RenderMethod = "addSignedImmOperands"; }
def b15_2Imm : Operand<OtherVT> { let ParserMatchClass = b15_2ImmOperand; let DecoderMethod = "brtargetDecoder"; let PrintMethod = "printBrtarget"; } def b15_2Imm : Operand<OtherVT> { let ParserMatchClass = b15_2ImmOperand; let DecoderMethod = "brtargetDecoder"; let PrintMethod = "printBrtarget"; }
defm b15_2ImmPred : ImmOpPred<[{ return isShiftedInt<15, 2>(N->getSExtValue());}]>; def b15_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<15, 2>(N->getSExtValue());}]>;
def u11_3ImmOperand : AsmOperandClass { let Name = "u11_3Imm"; let RenderMethod = "addImmOperands"; } def u11_3ImmOperand : AsmOperandClass { let Name = "u11_3Imm"; let RenderMethod = "addImmOperands"; }
def u11_3Imm : Operand<i32> { let ParserMatchClass = u11_3ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } def u11_3Imm : Operand<i32> { let ParserMatchClass = u11_3ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
defm u11_3ImmPred : ImmOpPred<[{ return isShiftedUInt<11, 3>(N->getSExtValue());}]>; def u11_3ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<11, 3>(N->getSExtValue());}]>;
def s4_3ImmOperand : AsmOperandClass { let Name = "s4_3Imm"; let RenderMethod = "addSignedImmOperands"; } def s4_3ImmOperand : AsmOperandClass { let Name = "s4_3Imm"; let RenderMethod = "addSignedImmOperands"; }
def s4_3Imm : Operand<i32> { let ParserMatchClass = s4_3ImmOperand; let DecoderMethod = "s4_3ImmDecoder"; } def s4_3Imm : Operand<i32> { let ParserMatchClass = s4_3ImmOperand; let DecoderMethod = "s4_3ImmDecoder"; }
defm s4_3ImmPred : ImmOpPred<[{ return isShiftedInt<4, 3>(N->getSExtValue());}]>; def s4_3ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<4, 3>(N->getSExtValue());}]>;
def m32_0ImmOperand : AsmOperandClass { let Name = "m32_0Imm"; let RenderMethod = "addImmOperands"; } def m32_0ImmOperand : AsmOperandClass { let Name = "m32_0Imm"; let RenderMethod = "addImmOperands"; }
def m32_0Imm : Operand<i32> { let ParserMatchClass = m32_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } def m32_0Imm : Operand<i32> { let ParserMatchClass = m32_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
defm m32_0ImmPred : ImmOpPred<[{ return isShiftedInt<32, 0>(N->getSExtValue());}]>; def m32_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<32, 0>(N->getSExtValue());}]>;
def u3_1ImmOperand : AsmOperandClass { let Name = "u3_1Imm"; let RenderMethod = "addImmOperands"; } def u3_1ImmOperand : AsmOperandClass { let Name = "u3_1Imm"; let RenderMethod = "addImmOperands"; }
def u3_1Imm : Operand<i32> { let ParserMatchClass = u3_1ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } def u3_1Imm : Operand<i32> { let ParserMatchClass = u3_1ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
defm u3_1ImmPred : ImmOpPred<[{ return isShiftedUInt<3, 1>(N->getSExtValue());}]>; def u3_1ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<3, 1>(N->getSExtValue());}]>;
def u1_0ImmOperand : AsmOperandClass { let Name = "u1_0Imm"; let RenderMethod = "addImmOperands"; } def u1_0ImmOperand : AsmOperandClass { let Name = "u1_0Imm"; let RenderMethod = "addImmOperands"; }
def u1_0Imm : Operand<i32> { let ParserMatchClass = u1_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } def u1_0Imm : Operand<i32> { let ParserMatchClass = u1_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
defm u1_0ImmPred : ImmOpPred<[{ return isShiftedUInt<1, 0>(N->getSExtValue());}]>; def u1_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<1, 0>(N->getSExtValue());}]>;
def s31_1ImmOperand : AsmOperandClass { let Name = "s31_1Imm"; let RenderMethod = "addSignedImmOperands"; } def s31_1ImmOperand : AsmOperandClass { let Name = "s31_1Imm"; let RenderMethod = "addSignedImmOperands"; }
def s31_1Imm : Operand<i32> { let ParserMatchClass = s31_1ImmOperand; let DecoderMethod = "s31_1ImmDecoder"; } def s31_1Imm : Operand<i32> { let ParserMatchClass = s31_1ImmOperand; let DecoderMethod = "s31_1ImmDecoder"; }
defm s31_1ImmPred : ImmOpPred<[{ return isShiftedInt<32, 1>(N->getSExtValue());}]>; def s31_1ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<32, 1>(N->getSExtValue());}]>;
def s3_0ImmOperand : AsmOperandClass { let Name = "s3_0Imm"; let RenderMethod = "addSignedImmOperands"; } def s3_0ImmOperand : AsmOperandClass { let Name = "s3_0Imm"; let RenderMethod = "addSignedImmOperands"; }
def s3_0Imm : Operand<i32> { let ParserMatchClass = s3_0ImmOperand; let DecoderMethod = "s3_0ImmDecoder"; } def s3_0Imm : Operand<i32> { let ParserMatchClass = s3_0ImmOperand; let DecoderMethod = "s3_0ImmDecoder"; }
defm s3_0ImmPred : ImmOpPred<[{ return isShiftedInt<3, 0>(N->getSExtValue());}]>; def s3_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<3, 0>(N->getSExtValue());}]>;
def s30_2ImmOperand : AsmOperandClass { let Name = "s30_2Imm"; let RenderMethod = "addSignedImmOperands"; } def s30_2ImmOperand : AsmOperandClass { let Name = "s30_2Imm"; let RenderMethod = "addSignedImmOperands"; }
def s30_2Imm : Operand<i32> { let ParserMatchClass = s30_2ImmOperand; let DecoderMethod = "s30_2ImmDecoder"; } def s30_2Imm : Operand<i32> { let ParserMatchClass = s30_2ImmOperand; let DecoderMethod = "s30_2ImmDecoder"; }
defm s30_2ImmPred : ImmOpPred<[{ return isShiftedInt<32, 2>(N->getSExtValue());}]>; def s30_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<32, 2>(N->getSExtValue());}]>;
def u4_0ImmOperand : AsmOperandClass { let Name = "u4_0Imm"; let RenderMethod = "addImmOperands"; } def u4_0ImmOperand : AsmOperandClass { let Name = "u4_0Imm"; let RenderMethod = "addImmOperands"; }
def u4_0Imm : Operand<i32> { let ParserMatchClass = u4_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } def u4_0Imm : Operand<i32> { let ParserMatchClass = u4_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
defm u4_0ImmPred : ImmOpPred<[{ return isShiftedUInt<4, 0>(N->getSExtValue());}]>; def u4_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<4, 0>(N->getSExtValue());}]>;
def s6_0ImmOperand : AsmOperandClass { let Name = "s6_0Imm"; let RenderMethod = "addSignedImmOperands"; } def s6_0ImmOperand : AsmOperandClass { let Name = "s6_0Imm"; let RenderMethod = "addSignedImmOperands"; }
def s6_0Imm : Operand<i32> { let ParserMatchClass = s6_0ImmOperand; let DecoderMethod = "s6_0ImmDecoder"; } def s6_0Imm : Operand<i32> { let ParserMatchClass = s6_0ImmOperand; let DecoderMethod = "s6_0ImmDecoder"; }
defm s6_0ImmPred : ImmOpPred<[{ return isShiftedInt<6, 0>(N->getSExtValue());}]>; def s6_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<6, 0>(N->getSExtValue());}]>;
def u5_3ImmOperand : AsmOperandClass { let Name = "u5_3Imm"; let RenderMethod = "addImmOperands"; } def u5_3ImmOperand : AsmOperandClass { let Name = "u5_3Imm"; let RenderMethod = "addImmOperands"; }
def u5_3Imm : Operand<i32> { let ParserMatchClass = u5_3ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } def u5_3Imm : Operand<i32> { let ParserMatchClass = u5_3ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
defm u5_3ImmPred : ImmOpPred<[{ return isShiftedUInt<5, 3>(N->getSExtValue());}]>; def u5_3ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<5, 3>(N->getSExtValue());}]>;
def s32_0ImmOperand : AsmOperandClass { let Name = "s32_0Imm"; let RenderMethod = "addSignedImmOperands"; } def s32_0ImmOperand : AsmOperandClass { let Name = "s32_0Imm"; let RenderMethod = "addSignedImmOperands"; }
def s32_0Imm : Operand<i32> { let ParserMatchClass = s32_0ImmOperand; let DecoderMethod = "s32_0ImmDecoder"; } def s32_0Imm : Operand<i32> { let ParserMatchClass = s32_0ImmOperand; let DecoderMethod = "s32_0ImmDecoder"; }
defm s32_0ImmPred : ImmOpPred<[{ return isShiftedInt<32, 0>(N->getSExtValue());}]>; def s32_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<32, 0>(N->getSExtValue());}]>;
def s6_3ImmOperand : AsmOperandClass { let Name = "s6_3Imm"; let RenderMethod = "addSignedImmOperands"; } def s6_3ImmOperand : AsmOperandClass { let Name = "s6_3Imm"; let RenderMethod = "addSignedImmOperands"; }
def s6_3Imm : Operand<i32> { let ParserMatchClass = s6_3ImmOperand; let DecoderMethod = "s6_3ImmDecoder"; } def s6_3Imm : Operand<i32> { let ParserMatchClass = s6_3ImmOperand; let DecoderMethod = "s6_3ImmDecoder"; }
defm s6_3ImmPred : ImmOpPred<[{ return isShiftedInt<6, 3>(N->getSExtValue());}]>; def s6_3ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<6, 3>(N->getSExtValue());}]>;
def u10_0ImmOperand : AsmOperandClass { let Name = "u10_0Imm"; let RenderMethod = "addImmOperands"; } def u10_0ImmOperand : AsmOperandClass { let Name = "u10_0Imm"; let RenderMethod = "addImmOperands"; }
def u10_0Imm : Operand<i32> { let ParserMatchClass = u10_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } def u10_0Imm : Operand<i32> { let ParserMatchClass = u10_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
defm u10_0ImmPred : ImmOpPred<[{ return isShiftedUInt<10, 0>(N->getSExtValue());}]>; def u10_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<10, 0>(N->getSExtValue());}]>;
def u31_1ImmOperand : AsmOperandClass { let Name = "u31_1Imm"; let RenderMethod = "addImmOperands"; } def u31_1ImmOperand : AsmOperandClass { let Name = "u31_1Imm"; let RenderMethod = "addImmOperands"; }
def u31_1Imm : Operand<i32> { let ParserMatchClass = u31_1ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } def u31_1Imm : Operand<i32> { let ParserMatchClass = u31_1ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
defm u31_1ImmPred : ImmOpPred<[{ return isShiftedUInt<32, 1>(N->getSExtValue());}]>; def u31_1ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<32, 1>(N->getSExtValue());}]>;
def s4_1ImmOperand : AsmOperandClass { let Name = "s4_1Imm"; let RenderMethod = "addSignedImmOperands"; } def s4_1ImmOperand : AsmOperandClass { let Name = "s4_1Imm"; let RenderMethod = "addSignedImmOperands"; }
def s4_1Imm : Operand<i32> { let ParserMatchClass = s4_1ImmOperand; let DecoderMethod = "s4_1ImmDecoder"; } def s4_1Imm : Operand<i32> { let ParserMatchClass = s4_1ImmOperand; let DecoderMethod = "s4_1ImmDecoder"; }
defm s4_1ImmPred : ImmOpPred<[{ return isShiftedInt<4, 1>(N->getSExtValue());}]>; def s4_1ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<4, 1>(N->getSExtValue());}]>;
def u16_0ImmOperand : AsmOperandClass { let Name = "u16_0Imm"; let RenderMethod = "addImmOperands"; } def u16_0ImmOperand : AsmOperandClass { let Name = "u16_0Imm"; let RenderMethod = "addImmOperands"; }
def u16_0Imm : Operand<i32> { let ParserMatchClass = u16_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } def u16_0Imm : Operand<i32> { let ParserMatchClass = u16_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
defm u16_0ImmPred : ImmOpPred<[{ return isShiftedUInt<16, 0>(N->getSExtValue());}]>; def u16_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<16, 0>(N->getSExtValue());}]>;
def u6_1ImmOperand : AsmOperandClass { let Name = "u6_1Imm"; let RenderMethod = "addImmOperands"; } def u6_1ImmOperand : AsmOperandClass { let Name = "u6_1Imm"; let RenderMethod = "addImmOperands"; }
def u6_1Imm : Operand<i32> { let ParserMatchClass = u6_1ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } def u6_1Imm : Operand<i32> { let ParserMatchClass = u6_1ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
defm u6_1ImmPred : ImmOpPred<[{ return isShiftedUInt<6, 1>(N->getSExtValue());}]>; def u6_1ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<6, 1>(N->getSExtValue());}]>;
def u5_2ImmOperand : AsmOperandClass { let Name = "u5_2Imm"; let RenderMethod = "addImmOperands"; } def u5_2ImmOperand : AsmOperandClass { let Name = "u5_2Imm"; let RenderMethod = "addImmOperands"; }
def u5_2Imm : Operand<i32> { let ParserMatchClass = u5_2ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } def u5_2Imm : Operand<i32> { let ParserMatchClass = u5_2ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
defm u5_2ImmPred : ImmOpPred<[{ return isShiftedUInt<5, 2>(N->getSExtValue());}]>; def u5_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<5, 2>(N->getSExtValue());}]>;
def u26_6ImmOperand : AsmOperandClass { let Name = "u26_6Imm"; let RenderMethod = "addImmOperands"; } def u26_6ImmOperand : AsmOperandClass { let Name = "u26_6Imm"; let RenderMethod = "addImmOperands"; }
def u26_6Imm : Operand<i32> { let ParserMatchClass = u26_6ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } def u26_6Imm : Operand<i32> { let ParserMatchClass = u26_6ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
defm u26_6ImmPred : ImmOpPred<[{ return isShiftedUInt<26, 6>(N->getSExtValue());}]>; def u26_6ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<26, 6>(N->getSExtValue());}]>;
def u6_2ImmOperand : AsmOperandClass { let Name = "u6_2Imm"; let RenderMethod = "addImmOperands"; } def u6_2ImmOperand : AsmOperandClass { let Name = "u6_2Imm"; let RenderMethod = "addImmOperands"; }
def u6_2Imm : Operand<i32> { let ParserMatchClass = u6_2ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } def u6_2Imm : Operand<i32> { let ParserMatchClass = u6_2ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
defm u6_2ImmPred : ImmOpPred<[{ return isShiftedUInt<6, 2>(N->getSExtValue());}]>; def u6_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<6, 2>(N->getSExtValue());}]>;
def u7_0ImmOperand : AsmOperandClass { let Name = "u7_0Imm"; let RenderMethod = "addImmOperands"; } def u7_0ImmOperand : AsmOperandClass { let Name = "u7_0Imm"; let RenderMethod = "addImmOperands"; }
def u7_0Imm : Operand<i32> { let ParserMatchClass = u7_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } def u7_0Imm : Operand<i32> { let ParserMatchClass = u7_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
defm u7_0ImmPred : ImmOpPred<[{ return isShiftedUInt<7, 0>(N->getSExtValue());}]>; def u7_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<7, 0>(N->getSExtValue());}]>;
def b13_2ImmOperand : AsmOperandClass { let Name = "b13_2Imm"; let RenderMethod = "addSignedImmOperands"; } def b13_2ImmOperand : AsmOperandClass { let Name = "b13_2Imm"; let RenderMethod = "addSignedImmOperands"; }
def b13_2Imm : Operand<OtherVT> { let ParserMatchClass = b13_2ImmOperand; let DecoderMethod = "brtargetDecoder"; let PrintMethod = "printBrtarget"; } def b13_2Imm : Operand<OtherVT> { let ParserMatchClass = b13_2ImmOperand; let DecoderMethod = "brtargetDecoder"; let PrintMethod = "printBrtarget"; }
defm b13_2ImmPred : ImmOpPred<[{ return isShiftedInt<13, 2>(N->getSExtValue());}]>; def b13_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<13, 2>(N->getSExtValue());}]>;
def u5_0ImmOperand : AsmOperandClass { let Name = "u5_0Imm"; let RenderMethod = "addImmOperands"; } def u5_0ImmOperand : AsmOperandClass { let Name = "u5_0Imm"; let RenderMethod = "addImmOperands"; }
def u5_0Imm : Operand<i32> { let ParserMatchClass = u5_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } def u5_0Imm : Operand<i32> { let ParserMatchClass = u5_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
defm u5_0ImmPred : ImmOpPred<[{ return isShiftedUInt<5, 0>(N->getSExtValue());}]>; def u5_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<5, 0>(N->getSExtValue());}]>;
def u2_0ImmOperand : AsmOperandClass { let Name = "u2_0Imm"; let RenderMethod = "addImmOperands"; } def u2_0ImmOperand : AsmOperandClass { let Name = "u2_0Imm"; let RenderMethod = "addImmOperands"; }
def u2_0Imm : Operand<i32> { let ParserMatchClass = u2_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } def u2_0Imm : Operand<i32> { let ParserMatchClass = u2_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
defm u2_0ImmPred : ImmOpPred<[{ return isShiftedUInt<2, 0>(N->getSExtValue());}]>; def u2_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<2, 0>(N->getSExtValue());}]>;
def s4_2ImmOperand : AsmOperandClass { let Name = "s4_2Imm"; let RenderMethod = "addSignedImmOperands"; } def s4_2ImmOperand : AsmOperandClass { let Name = "s4_2Imm"; let RenderMethod = "addSignedImmOperands"; }
def s4_2Imm : Operand<i32> { let ParserMatchClass = s4_2ImmOperand; let DecoderMethod = "s4_2ImmDecoder"; } def s4_2Imm : Operand<i32> { let ParserMatchClass = s4_2ImmOperand; let DecoderMethod = "s4_2ImmDecoder"; }
defm s4_2ImmPred : ImmOpPred<[{ return isShiftedInt<4, 2>(N->getSExtValue());}]>; def s4_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<4, 2>(N->getSExtValue());}]>;
def b30_2ImmOperand : AsmOperandClass { let Name = "b30_2Imm"; let RenderMethod = "addSignedImmOperands"; } def b30_2ImmOperand : AsmOperandClass { let Name = "b30_2Imm"; let RenderMethod = "addSignedImmOperands"; }
def b30_2Imm : Operand<OtherVT> { let ParserMatchClass = b30_2ImmOperand; let DecoderMethod = "brtargetDecoder"; let PrintMethod = "printBrtarget"; } def b30_2Imm : Operand<OtherVT> { let ParserMatchClass = b30_2ImmOperand; let DecoderMethod = "brtargetDecoder"; let PrintMethod = "printBrtarget"; }
defm b30_2ImmPred : ImmOpPred<[{ return isShiftedInt<32, 2>(N->getSExtValue());}]>; def b30_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<32, 2>(N->getSExtValue());}]>;
def u8_0ImmOperand : AsmOperandClass { let Name = "u8_0Imm"; let RenderMethod = "addImmOperands"; } def u8_0ImmOperand : AsmOperandClass { let Name = "u8_0Imm"; let RenderMethod = "addImmOperands"; }
def u8_0Imm : Operand<i32> { let ParserMatchClass = u8_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } def u8_0Imm : Operand<i32> { let ParserMatchClass = u8_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
defm u8_0ImmPred : ImmOpPred<[{ return isShiftedUInt<8, 0>(N->getSExtValue());}]>; def u8_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<8, 0>(N->getSExtValue());}]>;
def u30_2ImmOperand : AsmOperandClass { let Name = "u30_2Imm"; let RenderMethod = "addImmOperands"; } def u30_2ImmOperand : AsmOperandClass { let Name = "u30_2Imm"; let RenderMethod = "addImmOperands"; }
def u30_2Imm : Operand<i32> { let ParserMatchClass = u30_2ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } def u30_2Imm : Operand<i32> { let ParserMatchClass = u30_2ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
defm u30_2ImmPred : ImmOpPred<[{ return isShiftedUInt<32, 2>(N->getSExtValue());}]>; def u30_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<32, 2>(N->getSExtValue());}]>;

View File

@ -22,14 +22,14 @@ class T_RP_pat <InstHexagon MI, Intrinsic IntID>
def: Pat<(int_hexagon_A2_add IntRegs:$Rs, IntRegs:$Rt), def: Pat<(int_hexagon_A2_add IntRegs:$Rs, IntRegs:$Rt),
(A2_add IntRegs:$Rs, IntRegs:$Rt)>; (A2_add IntRegs:$Rs, IntRegs:$Rt)>;
def: Pat<(int_hexagon_A2_addi IntRegs:$Rs, timm:$s16), def: Pat<(int_hexagon_A2_addi IntRegs:$Rs, imm:$s16),
(A2_addi IntRegs:$Rs, imm:$s16)>; (A2_addi IntRegs:$Rs, imm:$s16)>;
def: Pat<(int_hexagon_A2_addp DoubleRegs:$Rs, DoubleRegs:$Rt), def: Pat<(int_hexagon_A2_addp DoubleRegs:$Rs, DoubleRegs:$Rt),
(A2_addp DoubleRegs:$Rs, DoubleRegs:$Rt)>; (A2_addp DoubleRegs:$Rs, DoubleRegs:$Rt)>;
def: Pat<(int_hexagon_A2_sub IntRegs:$Rs, IntRegs:$Rt), def: Pat<(int_hexagon_A2_sub IntRegs:$Rs, IntRegs:$Rt),
(A2_sub IntRegs:$Rs, IntRegs:$Rt)>; (A2_sub IntRegs:$Rs, IntRegs:$Rt)>;
def: Pat<(int_hexagon_A2_subri timm:$s10, IntRegs:$Rs), def: Pat<(int_hexagon_A2_subri imm:$s10, IntRegs:$Rs),
(A2_subri imm:$s10, IntRegs:$Rs)>; (A2_subri imm:$s10, IntRegs:$Rs)>;
def: Pat<(int_hexagon_A2_subp DoubleRegs:$Rs, DoubleRegs:$Rt), def: Pat<(int_hexagon_A2_subp DoubleRegs:$Rs, DoubleRegs:$Rt),
(A2_subp DoubleRegs:$Rs, DoubleRegs:$Rt)>; (A2_subp DoubleRegs:$Rs, DoubleRegs:$Rt)>;
@ -45,26 +45,26 @@ def: Pat<(int_hexagon_M2_dpmpyss_s0 IntRegs:$Rs, IntRegs:$Rt),
def: Pat<(int_hexagon_M2_dpmpyuu_s0 IntRegs:$Rs, IntRegs:$Rt), def: Pat<(int_hexagon_M2_dpmpyuu_s0 IntRegs:$Rs, IntRegs:$Rt),
(M2_dpmpyuu_s0 IntRegs:$Rs, IntRegs:$Rt)>; (M2_dpmpyuu_s0 IntRegs:$Rs, IntRegs:$Rt)>;
def: Pat<(int_hexagon_S2_asl_i_r IntRegs:$Rs, timm:$u5), def: Pat<(int_hexagon_S2_asl_i_r IntRegs:$Rs, imm:$u5),
(S2_asl_i_r IntRegs:$Rs, imm:$u5)>; (S2_asl_i_r IntRegs:$Rs, imm:$u5)>;
def: Pat<(int_hexagon_S2_lsr_i_r IntRegs:$Rs, timm:$u5), def: Pat<(int_hexagon_S2_lsr_i_r IntRegs:$Rs, imm:$u5),
(S2_lsr_i_r IntRegs:$Rs, imm:$u5)>; (S2_lsr_i_r IntRegs:$Rs, imm:$u5)>;
def: Pat<(int_hexagon_S2_asr_i_r IntRegs:$Rs, timm:$u5), def: Pat<(int_hexagon_S2_asr_i_r IntRegs:$Rs, imm:$u5),
(S2_asr_i_r IntRegs:$Rs, imm:$u5)>; (S2_asr_i_r IntRegs:$Rs, imm:$u5)>;
def: Pat<(int_hexagon_S2_asl_i_p DoubleRegs:$Rs, timm:$u6), def: Pat<(int_hexagon_S2_asl_i_p DoubleRegs:$Rs, imm:$u6),
(S2_asl_i_p DoubleRegs:$Rs, imm:$u6)>; (S2_asl_i_p DoubleRegs:$Rs, imm:$u6)>;
def: Pat<(int_hexagon_S2_lsr_i_p DoubleRegs:$Rs, timm:$u6), def: Pat<(int_hexagon_S2_lsr_i_p DoubleRegs:$Rs, imm:$u6),
(S2_lsr_i_p DoubleRegs:$Rs, imm:$u6)>; (S2_lsr_i_p DoubleRegs:$Rs, imm:$u6)>;
def: Pat<(int_hexagon_S2_asr_i_p DoubleRegs:$Rs, timm:$u6), def: Pat<(int_hexagon_S2_asr_i_p DoubleRegs:$Rs, imm:$u6),
(S2_asr_i_p DoubleRegs:$Rs, imm:$u6)>; (S2_asr_i_p DoubleRegs:$Rs, imm:$u6)>;
def: Pat<(int_hexagon_A2_and IntRegs:$Rs, IntRegs:$Rt), def: Pat<(int_hexagon_A2_and IntRegs:$Rs, IntRegs:$Rt),
(A2_and IntRegs:$Rs, IntRegs:$Rt)>; (A2_and IntRegs:$Rs, IntRegs:$Rt)>;
def: Pat<(int_hexagon_A2_andir IntRegs:$Rs, timm:$s10), def: Pat<(int_hexagon_A2_andir IntRegs:$Rs, imm:$s10),
(A2_andir IntRegs:$Rs, imm:$s10)>; (A2_andir IntRegs:$Rs, imm:$s10)>;
def: Pat<(int_hexagon_A2_or IntRegs:$Rs, IntRegs:$Rt), def: Pat<(int_hexagon_A2_or IntRegs:$Rs, IntRegs:$Rt),
(A2_or IntRegs:$Rs, IntRegs:$Rt)>; (A2_or IntRegs:$Rs, IntRegs:$Rt)>;
def: Pat<(int_hexagon_A2_orir IntRegs:$Rs, timm:$s10), def: Pat<(int_hexagon_A2_orir IntRegs:$Rs, imm:$s10),
(A2_orir IntRegs:$Rs, imm:$s10)>; (A2_orir IntRegs:$Rs, imm:$s10)>;
def: Pat<(int_hexagon_A2_xor IntRegs:$Rs, IntRegs:$Rt), def: Pat<(int_hexagon_A2_xor IntRegs:$Rs, IntRegs:$Rt),
(A2_xor IntRegs:$Rs, IntRegs:$Rt)>; (A2_xor IntRegs:$Rs, IntRegs:$Rt)>;
@ -99,13 +99,13 @@ def : Pat <(int_hexagon_S5_asrhub_rnd_sat_goodsyntax I64:$Rs, (i32 0)),
(S2_vsathub I64:$Rs)>; (S2_vsathub I64:$Rs)>;
} }
def : Pat <(int_hexagon_S2_asr_i_r_rnd_goodsyntax I32:$Rs, u5_0ImmPred_timm:$imm), def : Pat <(int_hexagon_S2_asr_i_r_rnd_goodsyntax I32:$Rs, u5_0ImmPred:$imm),
(S2_asr_i_r_rnd I32:$Rs, (UDEC1 u5_0ImmPred:$imm))>; (S2_asr_i_r_rnd I32:$Rs, (UDEC1 u5_0ImmPred:$imm))>;
def : Pat <(int_hexagon_S2_asr_i_p_rnd_goodsyntax I64:$Rs, u6_0ImmPred_timm:$imm), def : Pat <(int_hexagon_S2_asr_i_p_rnd_goodsyntax I64:$Rs, u6_0ImmPred:$imm),
(S2_asr_i_p_rnd I64:$Rs, (UDEC1 u6_0ImmPred:$imm))>; (S2_asr_i_p_rnd I64:$Rs, (UDEC1 u6_0ImmPred:$imm))>;
def : Pat <(int_hexagon_S5_vasrhrnd_goodsyntax I64:$Rs, u4_0ImmPred_timm:$imm), def : Pat <(int_hexagon_S5_vasrhrnd_goodsyntax I64:$Rs, u4_0ImmPred:$imm),
(S5_vasrhrnd I64:$Rs, (UDEC1 u4_0ImmPred:$imm))>; (S5_vasrhrnd I64:$Rs, (UDEC1 u4_0ImmPred:$imm))>;
def : Pat <(int_hexagon_S5_asrhub_rnd_sat_goodsyntax I64:$Rs, u4_0ImmPred_timm:$imm), def : Pat <(int_hexagon_S5_asrhub_rnd_sat_goodsyntax I64:$Rs, u4_0ImmPred:$imm),
(S5_asrhub_rnd_sat I64:$Rs, (UDEC1 u4_0ImmPred:$imm))>; (S5_asrhub_rnd_sat I64:$Rs, (UDEC1 u4_0ImmPred:$imm))>;
def ImmExt64: SDNodeXForm<imm, [{ def ImmExt64: SDNodeXForm<imm, [{
@ -121,13 +121,13 @@ def ImmExt64: SDNodeXForm<imm, [{
// To connect the builtin with the instruction, the builtin's operand // To connect the builtin with the instruction, the builtin's operand
// needs to be extended to the right type. // needs to be extended to the right type.
def : Pat<(int_hexagon_A2_tfrpi timm:$Is), def : Pat<(int_hexagon_A2_tfrpi imm:$Is),
(A2_tfrpi (ImmExt64 $Is))>; (A2_tfrpi (ImmExt64 $Is))>;
def : Pat <(int_hexagon_C2_cmpgei I32:$src1, s32_0ImmPred_timm:$src2), def : Pat <(int_hexagon_C2_cmpgei I32:$src1, s32_0ImmPred:$src2),
(C2_tfrpr (C2_cmpgti I32:$src1, (SDEC1 s32_0ImmPred:$src2)))>; (C2_tfrpr (C2_cmpgti I32:$src1, (SDEC1 s32_0ImmPred:$src2)))>;
def : Pat <(int_hexagon_C2_cmpgeui I32:$src1, u32_0ImmPred_timm:$src2), def : Pat <(int_hexagon_C2_cmpgeui I32:$src1, u32_0ImmPred:$src2),
(C2_tfrpr (C2_cmpgtui I32:$src1, (UDEC1 u32_0ImmPred:$src2)))>; (C2_tfrpr (C2_cmpgtui I32:$src1, (UDEC1 u32_0ImmPred:$src2)))>;
def : Pat <(int_hexagon_C2_cmpgeui I32:$src, 0), def : Pat <(int_hexagon_C2_cmpgeui I32:$src, 0),
@ -142,7 +142,7 @@ def : Pat <(int_hexagon_C2_cmpltu I32:$src1, I32:$src2),
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
class S2op_tableidx_pat <Intrinsic IntID, InstHexagon OutputInst, class S2op_tableidx_pat <Intrinsic IntID, InstHexagon OutputInst,
SDNodeXForm XformImm> SDNodeXForm XformImm>
: Pat <(IntID I32:$src1, I32:$src2, u4_0ImmPred_timm:$src3, u5_0ImmPred_timm:$src4), : Pat <(IntID I32:$src1, I32:$src2, u4_0ImmPred:$src3, u5_0ImmPred:$src4),
(OutputInst I32:$src1, I32:$src2, u4_0ImmPred:$src3, (OutputInst I32:$src1, I32:$src2, u4_0ImmPred:$src3,
(XformImm u5_0ImmPred:$src4))>; (XformImm u5_0ImmPred:$src4))>;
@ -197,11 +197,11 @@ class T_stc_pat <InstHexagon MI, Intrinsic IntID, PatLeaf Imm, PatLeaf Val>
: Pat<(IntID I32:$Rs, Val:$Rt, I32:$Ru, Imm:$s), : Pat<(IntID I32:$Rs, Val:$Rt, I32:$Ru, Imm:$s),
(MI I32:$Rs, Imm:$s, I32:$Ru, Val:$Rt)>; (MI I32:$Rs, Imm:$s, I32:$Ru, Val:$Rt)>;
def: T_stc_pat<S2_storerb_pci, int_hexagon_circ_stb, s4_0ImmPred_timm, I32>; def: T_stc_pat<S2_storerb_pci, int_hexagon_circ_stb, s4_0ImmPred, I32>;
def: T_stc_pat<S2_storerh_pci, int_hexagon_circ_sth, s4_1ImmPred_timm, I32>; def: T_stc_pat<S2_storerh_pci, int_hexagon_circ_sth, s4_1ImmPred, I32>;
def: T_stc_pat<S2_storeri_pci, int_hexagon_circ_stw, s4_2ImmPred_timm, I32>; def: T_stc_pat<S2_storeri_pci, int_hexagon_circ_stw, s4_2ImmPred, I32>;
def: T_stc_pat<S2_storerd_pci, int_hexagon_circ_std, s4_3ImmPred_timm, I64>; def: T_stc_pat<S2_storerd_pci, int_hexagon_circ_std, s4_3ImmPred, I64>;
def: T_stc_pat<S2_storerf_pci, int_hexagon_circ_sthhi, s4_1ImmPred_timm, I32>; def: T_stc_pat<S2_storerf_pci, int_hexagon_circ_sthhi, s4_1ImmPred, I32>;
multiclass MaskedStore <InstHexagon MI, Intrinsic IntID> { multiclass MaskedStore <InstHexagon MI, Intrinsic IntID> {
def : Pat<(IntID HvxQR:$src1, IntRegs:$src2, HvxVR:$src3), def : Pat<(IntID HvxQR:$src1, IntRegs:$src2, HvxVR:$src3),

View File

@ -360,7 +360,7 @@ class RDDSP_MM_DESC {
dag OutOperandList = (outs GPR32Opnd:$rt); dag OutOperandList = (outs GPR32Opnd:$rt);
dag InOperandList = (ins uimm7:$mask); dag InOperandList = (ins uimm7:$mask);
string AsmString = !strconcat("rddsp", "\t$rt, $mask"); string AsmString = !strconcat("rddsp", "\t$rt, $mask");
list<dag> Pattern = [(set GPR32Opnd:$rt, (int_mips_rddsp timmZExt7:$mask))]; list<dag> Pattern = [(set GPR32Opnd:$rt, (int_mips_rddsp immZExt7:$mask))];
InstrItinClass Itinerary = NoItinerary; InstrItinClass Itinerary = NoItinerary;
} }
@ -383,7 +383,7 @@ class WRDSP_MM_DESC {
dag OutOperandList = (outs); dag OutOperandList = (outs);
dag InOperandList = (ins GPR32Opnd:$rt, uimm7:$mask); dag InOperandList = (ins GPR32Opnd:$rt, uimm7:$mask);
string AsmString = !strconcat("wrdsp", "\t$rt, $mask"); string AsmString = !strconcat("wrdsp", "\t$rt, $mask");
list<dag> Pattern = [(int_mips_wrdsp GPR32Opnd:$rt, timmZExt7:$mask)]; list<dag> Pattern = [(int_mips_wrdsp GPR32Opnd:$rt, immZExt7:$mask)];
InstrItinClass Itinerary = NoItinerary; InstrItinClass Itinerary = NoItinerary;
bit isMoveReg = 1; bit isMoveReg = 1;
} }

View File

@ -16,7 +16,6 @@
// shamt must fit in 6 bits. // shamt must fit in 6 bits.
def immZExt6 : ImmLeaf<i32, [{return Imm == (Imm & 0x3f);}]>; def immZExt6 : ImmLeaf<i32, [{return Imm == (Imm & 0x3f);}]>;
def timmZExt6 : TImmLeaf<i32, [{return Imm == (Imm & 0x3f);}]>;
// Node immediate fits as 10-bit sign extended on target immediate. // Node immediate fits as 10-bit sign extended on target immediate.
// e.g. seqi, snei // e.g. seqi, snei

View File

@ -12,19 +12,12 @@
// ImmLeaf // ImmLeaf
def immZExt1 : ImmLeaf<i32, [{return isUInt<1>(Imm);}]>; def immZExt1 : ImmLeaf<i32, [{return isUInt<1>(Imm);}]>;
def timmZExt1 : ImmLeaf<i32, [{return isUInt<1>(Imm);}], NOOP_SDNodeXForm, timm>;
def immZExt2 : ImmLeaf<i32, [{return isUInt<2>(Imm);}]>; def immZExt2 : ImmLeaf<i32, [{return isUInt<2>(Imm);}]>;
def timmZExt2 : ImmLeaf<i32, [{return isUInt<2>(Imm);}], NOOP_SDNodeXForm, timm>;
def immZExt3 : ImmLeaf<i32, [{return isUInt<3>(Imm);}]>; def immZExt3 : ImmLeaf<i32, [{return isUInt<3>(Imm);}]>;
def timmZExt3 : ImmLeaf<i32, [{return isUInt<3>(Imm);}], NOOP_SDNodeXForm, timm>;
def immZExt4 : ImmLeaf<i32, [{return isUInt<4>(Imm);}]>; def immZExt4 : ImmLeaf<i32, [{return isUInt<4>(Imm);}]>;
def timmZExt4 : ImmLeaf<i32, [{return isUInt<4>(Imm);}], NOOP_SDNodeXForm, timm>;
def immZExt8 : ImmLeaf<i32, [{return isUInt<8>(Imm);}]>; def immZExt8 : ImmLeaf<i32, [{return isUInt<8>(Imm);}]>;
def timmZExt8 : ImmLeaf<i32, [{return isUInt<8>(Imm);}], NOOP_SDNodeXForm, timm>;
def immZExt10 : ImmLeaf<i32, [{return isUInt<10>(Imm);}]>; def immZExt10 : ImmLeaf<i32, [{return isUInt<10>(Imm);}]>;
def timmZExt10 : ImmLeaf<i32, [{return isUInt<10>(Imm);}], NOOP_SDNodeXForm, timm>;
def immSExt6 : ImmLeaf<i32, [{return isInt<6>(Imm);}]>; def immSExt6 : ImmLeaf<i32, [{return isInt<6>(Imm);}]>;
def timmSExt6 : ImmLeaf<i32, [{return isInt<6>(Imm);}], NOOP_SDNodeXForm, timm>;
def immSExt10 : ImmLeaf<i32, [{return isInt<10>(Imm);}]>; def immSExt10 : ImmLeaf<i32, [{return isInt<10>(Imm);}]>;
// Mips-specific dsp nodes // Mips-specific dsp nodes
@ -313,7 +306,7 @@ class PRECR_SRA_PH_W_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
dag OutOperandList = (outs ROT:$rt); dag OutOperandList = (outs ROT:$rt);
dag InOperandList = (ins ROS:$rs, uimm5:$sa, ROS:$src); dag InOperandList = (ins ROS:$rs, uimm5:$sa, ROS:$src);
string AsmString = !strconcat(instr_asm, "\t$rt, $rs, $sa"); string AsmString = !strconcat(instr_asm, "\t$rt, $rs, $sa");
list<dag> Pattern = [(set ROT:$rt, (OpNode ROS:$src, ROS:$rs, timmZExt5:$sa))]; list<dag> Pattern = [(set ROT:$rt, (OpNode ROS:$src, ROS:$rs, immZExt5:$sa))];
InstrItinClass Itinerary = itin; InstrItinClass Itinerary = itin;
string Constraints = "$src = $rt"; string Constraints = "$src = $rt";
string BaseOpcode = instr_asm; string BaseOpcode = instr_asm;
@ -450,7 +443,7 @@ class RDDSP_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
dag OutOperandList = (outs GPR32Opnd:$rd); dag OutOperandList = (outs GPR32Opnd:$rd);
dag InOperandList = (ins uimm10:$mask); dag InOperandList = (ins uimm10:$mask);
string AsmString = !strconcat(instr_asm, "\t$rd, $mask"); string AsmString = !strconcat(instr_asm, "\t$rd, $mask");
list<dag> Pattern = [(set GPR32Opnd:$rd, (OpNode timmZExt10:$mask))]; list<dag> Pattern = [(set GPR32Opnd:$rd, (OpNode immZExt10:$mask))];
InstrItinClass Itinerary = itin; InstrItinClass Itinerary = itin;
string BaseOpcode = instr_asm; string BaseOpcode = instr_asm;
bit isMoveReg = 1; bit isMoveReg = 1;
@ -461,7 +454,7 @@ class WRDSP_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
dag OutOperandList = (outs); dag OutOperandList = (outs);
dag InOperandList = (ins GPR32Opnd:$rs, uimm10:$mask); dag InOperandList = (ins GPR32Opnd:$rs, uimm10:$mask);
string AsmString = !strconcat(instr_asm, "\t$rs, $mask"); string AsmString = !strconcat(instr_asm, "\t$rs, $mask");
list<dag> Pattern = [(OpNode GPR32Opnd:$rs, timmZExt10:$mask)]; list<dag> Pattern = [(OpNode GPR32Opnd:$rs, immZExt10:$mask)];
InstrItinClass Itinerary = itin; InstrItinClass Itinerary = itin;
string BaseOpcode = instr_asm; string BaseOpcode = instr_asm;
bit isMoveReg = 1; bit isMoveReg = 1;
@ -1103,14 +1096,14 @@ class SHRLV_PH_DESC : SHLL_QB_R3_DESC_BASE<"shrlv.ph", int_mips_shrl_ph,
NoItinerary, DSPROpnd>; NoItinerary, DSPROpnd>;
// Misc // Misc
class APPEND_DESC : APPEND_DESC_BASE<"append", int_mips_append, uimm5, timmZExt5, class APPEND_DESC : APPEND_DESC_BASE<"append", int_mips_append, uimm5, immZExt5,
NoItinerary>; NoItinerary>;
class BALIGN_DESC : APPEND_DESC_BASE<"balign", int_mips_balign, uimm2, timmZExt2, class BALIGN_DESC : APPEND_DESC_BASE<"balign", int_mips_balign, uimm2, immZExt2,
NoItinerary>; NoItinerary>;
class PREPEND_DESC : APPEND_DESC_BASE<"prepend", int_mips_prepend, uimm5, class PREPEND_DESC : APPEND_DESC_BASE<"prepend", int_mips_prepend, uimm5,
timmZExt5, NoItinerary>; immZExt5, NoItinerary>;
// Pseudos. // Pseudos.
def BPOSGE32_PSEUDO : BPOSGE32_PSEUDO_DESC_BASE<int_mips_bposge32, def BPOSGE32_PSEUDO : BPOSGE32_PSEUDO_DESC_BASE<int_mips_bposge32,

View File

@ -1263,7 +1263,6 @@ def immSExt16 : PatLeaf<(imm), [{ return isInt<16>(N->getSExtValue()); }]>;
// Node immediate fits as 7-bit zero extended on target immediate. // Node immediate fits as 7-bit zero extended on target immediate.
def immZExt7 : PatLeaf<(imm), [{ return isUInt<7>(N->getZExtValue()); }]>; def immZExt7 : PatLeaf<(imm), [{ return isUInt<7>(N->getZExtValue()); }]>;
def timmZExt7 : PatLeaf<(timm), [{ return isUInt<7>(N->getZExtValue()); }]>;
// Node immediate fits as 16-bit zero extended on target immediate. // Node immediate fits as 16-bit zero extended on target immediate.
// The LO16 param means that only the lower 16 bits of the node // The LO16 param means that only the lower 16 bits of the node
@ -1296,7 +1295,6 @@ def immZExt32 : PatLeaf<(imm), [{ return isUInt<32>(N->getZExtValue()); }]>;
// shamt field must fit in 5 bits. // shamt field must fit in 5 bits.
def immZExt5 : ImmLeaf<i32, [{return Imm == (Imm & 0x1f);}]>; def immZExt5 : ImmLeaf<i32, [{return Imm == (Imm & 0x1f);}]>;
def timmZExt5 : TImmLeaf<i32, [{return Imm == (Imm & 0x1f);}]>;
def immZExt5Plus1 : PatLeaf<(imm), [{ def immZExt5Plus1 : PatLeaf<(imm), [{
return isUInt<5>(N->getZExtValue() - 1); return isUInt<5>(N->getZExtValue() - 1);

View File

@ -60,11 +60,6 @@ def immZExt2Ptr : ImmLeaf<iPTR, [{return isUInt<2>(Imm);}]>;
def immZExt3Ptr : ImmLeaf<iPTR, [{return isUInt<3>(Imm);}]>; def immZExt3Ptr : ImmLeaf<iPTR, [{return isUInt<3>(Imm);}]>;
def immZExt4Ptr : ImmLeaf<iPTR, [{return isUInt<4>(Imm);}]>; def immZExt4Ptr : ImmLeaf<iPTR, [{return isUInt<4>(Imm);}]>;
def timmZExt1Ptr : TImmLeaf<iPTR, [{return isUInt<1>(Imm);}]>;
def timmZExt2Ptr : TImmLeaf<iPTR, [{return isUInt<2>(Imm);}]>;
def timmZExt3Ptr : TImmLeaf<iPTR, [{return isUInt<3>(Imm);}]>;
def timmZExt4Ptr : TImmLeaf<iPTR, [{return isUInt<4>(Imm);}]>;
// Operands // Operands
def immZExt2Lsa : ImmLeaf<i32, [{return isUInt<2>(Imm - 1);}]>; def immZExt2Lsa : ImmLeaf<i32, [{return isUInt<2>(Imm - 1);}]>;
@ -1275,7 +1270,7 @@ class MSA_I8_SHF_DESC_BASE<string instr_asm, RegisterOperand ROWD,
dag OutOperandList = (outs ROWD:$wd); dag OutOperandList = (outs ROWD:$wd);
dag InOperandList = (ins ROWS:$ws, uimm8:$u8); dag InOperandList = (ins ROWS:$ws, uimm8:$u8);
string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $u8"); string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $u8");
list<dag> Pattern = [(set ROWD:$wd, (MipsSHF timmZExt8:$u8, ROWS:$ws))]; list<dag> Pattern = [(set ROWD:$wd, (MipsSHF immZExt8:$u8, ROWS:$ws))];
InstrItinClass Itinerary = itin; InstrItinClass Itinerary = itin;
} }
@ -2304,13 +2299,13 @@ class INSERT_FW_VIDX64_PSEUDO_DESC :
class INSERT_FD_VIDX64_PSEUDO_DESC : class INSERT_FD_VIDX64_PSEUDO_DESC :
MSA_INSERT_VIDX_PSEUDO_BASE<vector_insert, v2f64, MSA128DOpnd, FGR64Opnd, GPR64Opnd>; MSA_INSERT_VIDX_PSEUDO_BASE<vector_insert, v2f64, MSA128DOpnd, FGR64Opnd, GPR64Opnd>;
class INSVE_B_DESC : MSA_INSVE_DESC_BASE<"insve.b", insve_v16i8, uimm4, timmZExt4, class INSVE_B_DESC : MSA_INSVE_DESC_BASE<"insve.b", insve_v16i8, uimm4, immZExt4,
MSA128BOpnd>; MSA128BOpnd>;
class INSVE_H_DESC : MSA_INSVE_DESC_BASE<"insve.h", insve_v8i16, uimm3, timmZExt3, class INSVE_H_DESC : MSA_INSVE_DESC_BASE<"insve.h", insve_v8i16, uimm3, immZExt3,
MSA128HOpnd>; MSA128HOpnd>;
class INSVE_W_DESC : MSA_INSVE_DESC_BASE<"insve.w", insve_v4i32, uimm2, timmZExt2, class INSVE_W_DESC : MSA_INSVE_DESC_BASE<"insve.w", insve_v4i32, uimm2, immZExt2,
MSA128WOpnd>; MSA128WOpnd>;
class INSVE_D_DESC : MSA_INSVE_DESC_BASE<"insve.d", insve_v2i64, uimm1, timmZExt1, class INSVE_D_DESC : MSA_INSVE_DESC_BASE<"insve.d", insve_v2i64, uimm1, immZExt1,
MSA128DOpnd>; MSA128DOpnd>;
class LD_DESC_BASE<string instr_asm, SDPatternOperator OpNode, class LD_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@ -2523,22 +2518,22 @@ class PCNT_W_DESC : MSA_2R_DESC_BASE<"pcnt.w", ctpop, MSA128WOpnd>;
class PCNT_D_DESC : MSA_2R_DESC_BASE<"pcnt.d", ctpop, MSA128DOpnd>; class PCNT_D_DESC : MSA_2R_DESC_BASE<"pcnt.d", ctpop, MSA128DOpnd>;
class SAT_S_B_DESC : MSA_BIT_X_DESC_BASE<"sat_s.b", int_mips_sat_s_b, uimm3, class SAT_S_B_DESC : MSA_BIT_X_DESC_BASE<"sat_s.b", int_mips_sat_s_b, uimm3,
timmZExt3, MSA128BOpnd>; immZExt3, MSA128BOpnd>;
class SAT_S_H_DESC : MSA_BIT_X_DESC_BASE<"sat_s.h", int_mips_sat_s_h, uimm4, class SAT_S_H_DESC : MSA_BIT_X_DESC_BASE<"sat_s.h", int_mips_sat_s_h, uimm4,
timmZExt4, MSA128HOpnd>; immZExt4, MSA128HOpnd>;
class SAT_S_W_DESC : MSA_BIT_X_DESC_BASE<"sat_s.w", int_mips_sat_s_w, uimm5, class SAT_S_W_DESC : MSA_BIT_X_DESC_BASE<"sat_s.w", int_mips_sat_s_w, uimm5,
timmZExt5, MSA128WOpnd>; immZExt5, MSA128WOpnd>;
class SAT_S_D_DESC : MSA_BIT_X_DESC_BASE<"sat_s.d", int_mips_sat_s_d, uimm6, class SAT_S_D_DESC : MSA_BIT_X_DESC_BASE<"sat_s.d", int_mips_sat_s_d, uimm6,
timmZExt6, MSA128DOpnd>; immZExt6, MSA128DOpnd>;
class SAT_U_B_DESC : MSA_BIT_X_DESC_BASE<"sat_u.b", int_mips_sat_u_b, uimm3, class SAT_U_B_DESC : MSA_BIT_X_DESC_BASE<"sat_u.b", int_mips_sat_u_b, uimm3,
timmZExt3, MSA128BOpnd>; immZExt3, MSA128BOpnd>;
class SAT_U_H_DESC : MSA_BIT_X_DESC_BASE<"sat_u.h", int_mips_sat_u_h, uimm4, class SAT_U_H_DESC : MSA_BIT_X_DESC_BASE<"sat_u.h", int_mips_sat_u_h, uimm4,
timmZExt4, MSA128HOpnd>; immZExt4, MSA128HOpnd>;
class SAT_U_W_DESC : MSA_BIT_X_DESC_BASE<"sat_u.w", int_mips_sat_u_w, uimm5, class SAT_U_W_DESC : MSA_BIT_X_DESC_BASE<"sat_u.w", int_mips_sat_u_w, uimm5,
timmZExt5, MSA128WOpnd>; immZExt5, MSA128WOpnd>;
class SAT_U_D_DESC : MSA_BIT_X_DESC_BASE<"sat_u.d", int_mips_sat_u_d, uimm6, class SAT_U_D_DESC : MSA_BIT_X_DESC_BASE<"sat_u.d", int_mips_sat_u_d, uimm6,
timmZExt6, MSA128DOpnd>; immZExt6, MSA128DOpnd>;
class SHF_B_DESC : MSA_I8_SHF_DESC_BASE<"shf.b", MSA128BOpnd>; class SHF_B_DESC : MSA_I8_SHF_DESC_BASE<"shf.b", MSA128BOpnd>;
class SHF_H_DESC : MSA_I8_SHF_DESC_BASE<"shf.h", MSA128HOpnd>; class SHF_H_DESC : MSA_I8_SHF_DESC_BASE<"shf.h", MSA128HOpnd>;
@ -2551,16 +2546,16 @@ class SLD_D_DESC : MSA_3R_SLD_DESC_BASE<"sld.d", int_mips_sld_d, MSA128DOpnd>;
class SLDI_B_DESC : MSA_ELM_SLD_DESC_BASE<"sldi.b", int_mips_sldi_b, class SLDI_B_DESC : MSA_ELM_SLD_DESC_BASE<"sldi.b", int_mips_sldi_b,
MSA128BOpnd, MSA128BOpnd, uimm4, MSA128BOpnd, MSA128BOpnd, uimm4,
timmZExt4>; immZExt4>;
class SLDI_H_DESC : MSA_ELM_SLD_DESC_BASE<"sldi.h", int_mips_sldi_h, class SLDI_H_DESC : MSA_ELM_SLD_DESC_BASE<"sldi.h", int_mips_sldi_h,
MSA128HOpnd, MSA128HOpnd, uimm3, MSA128HOpnd, MSA128HOpnd, uimm3,
timmZExt3>; immZExt3>;
class SLDI_W_DESC : MSA_ELM_SLD_DESC_BASE<"sldi.w", int_mips_sldi_w, class SLDI_W_DESC : MSA_ELM_SLD_DESC_BASE<"sldi.w", int_mips_sldi_w,
MSA128WOpnd, MSA128WOpnd, uimm2, MSA128WOpnd, MSA128WOpnd, uimm2,
timmZExt2>; immZExt2>;
class SLDI_D_DESC : MSA_ELM_SLD_DESC_BASE<"sldi.d", int_mips_sldi_d, class SLDI_D_DESC : MSA_ELM_SLD_DESC_BASE<"sldi.d", int_mips_sldi_d,
MSA128DOpnd, MSA128DOpnd, uimm1, MSA128DOpnd, MSA128DOpnd, uimm1,
timmZExt1>; immZExt1>;
class SLL_B_DESC : MSA_3R_DESC_BASE<"sll.b", shl, MSA128BOpnd>; class SLL_B_DESC : MSA_3R_DESC_BASE<"sll.b", shl, MSA128BOpnd>;
class SLL_H_DESC : MSA_3R_DESC_BASE<"sll.h", shl, MSA128HOpnd>; class SLL_H_DESC : MSA_3R_DESC_BASE<"sll.h", shl, MSA128HOpnd>;
@ -2614,13 +2609,13 @@ class SRAR_W_DESC : MSA_3R_DESC_BASE<"srar.w", int_mips_srar_w, MSA128WOpnd>;
class SRAR_D_DESC : MSA_3R_DESC_BASE<"srar.d", int_mips_srar_d, MSA128DOpnd>; class SRAR_D_DESC : MSA_3R_DESC_BASE<"srar.d", int_mips_srar_d, MSA128DOpnd>;
class SRARI_B_DESC : MSA_BIT_X_DESC_BASE<"srari.b", int_mips_srari_b, uimm3, class SRARI_B_DESC : MSA_BIT_X_DESC_BASE<"srari.b", int_mips_srari_b, uimm3,
timmZExt3, MSA128BOpnd>; immZExt3, MSA128BOpnd>;
class SRARI_H_DESC : MSA_BIT_X_DESC_BASE<"srari.h", int_mips_srari_h, uimm4, class SRARI_H_DESC : MSA_BIT_X_DESC_BASE<"srari.h", int_mips_srari_h, uimm4,
timmZExt4, MSA128HOpnd>; immZExt4, MSA128HOpnd>;
class SRARI_W_DESC : MSA_BIT_X_DESC_BASE<"srari.w", int_mips_srari_w, uimm5, class SRARI_W_DESC : MSA_BIT_X_DESC_BASE<"srari.w", int_mips_srari_w, uimm5,
timmZExt5, MSA128WOpnd>; immZExt5, MSA128WOpnd>;
class SRARI_D_DESC : MSA_BIT_X_DESC_BASE<"srari.d", int_mips_srari_d, uimm6, class SRARI_D_DESC : MSA_BIT_X_DESC_BASE<"srari.d", int_mips_srari_d, uimm6,
timmZExt6, MSA128DOpnd>; immZExt6, MSA128DOpnd>;
class SRL_B_DESC : MSA_3R_DESC_BASE<"srl.b", srl, MSA128BOpnd>; class SRL_B_DESC : MSA_3R_DESC_BASE<"srl.b", srl, MSA128BOpnd>;
class SRL_H_DESC : MSA_3R_DESC_BASE<"srl.h", srl, MSA128HOpnd>; class SRL_H_DESC : MSA_3R_DESC_BASE<"srl.h", srl, MSA128HOpnd>;
@ -2642,13 +2637,13 @@ class SRLR_W_DESC : MSA_3R_DESC_BASE<"srlr.w", int_mips_srlr_w, MSA128WOpnd>;
class SRLR_D_DESC : MSA_3R_DESC_BASE<"srlr.d", int_mips_srlr_d, MSA128DOpnd>; class SRLR_D_DESC : MSA_3R_DESC_BASE<"srlr.d", int_mips_srlr_d, MSA128DOpnd>;
class SRLRI_B_DESC : MSA_BIT_X_DESC_BASE<"srlri.b", int_mips_srlri_b, uimm3, class SRLRI_B_DESC : MSA_BIT_X_DESC_BASE<"srlri.b", int_mips_srlri_b, uimm3,
timmZExt3, MSA128BOpnd>; immZExt3, MSA128BOpnd>;
class SRLRI_H_DESC : MSA_BIT_X_DESC_BASE<"srlri.h", int_mips_srlri_h, uimm4, class SRLRI_H_DESC : MSA_BIT_X_DESC_BASE<"srlri.h", int_mips_srlri_h, uimm4,
timmZExt4, MSA128HOpnd>; immZExt4, MSA128HOpnd>;
class SRLRI_W_DESC : MSA_BIT_X_DESC_BASE<"srlri.w", int_mips_srlri_w, uimm5, class SRLRI_W_DESC : MSA_BIT_X_DESC_BASE<"srlri.w", int_mips_srlri_w, uimm5,
timmZExt5, MSA128WOpnd>; immZExt5, MSA128WOpnd>;
class SRLRI_D_DESC : MSA_BIT_X_DESC_BASE<"srlri.d", int_mips_srlri_d, uimm6, class SRLRI_D_DESC : MSA_BIT_X_DESC_BASE<"srlri.d", int_mips_srlri_d, uimm6,
timmZExt6, MSA128DOpnd>; immZExt6, MSA128DOpnd>;
class ST_DESC_BASE<string instr_asm, SDPatternOperator OpNode, class ST_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
ValueType TyNode, RegisterOperand ROWD, ValueType TyNode, RegisterOperand ROWD,

View File

@ -2596,8 +2596,7 @@ static SDValue lowerVECTOR_SHUFFLE_SHF(SDValue Op, EVT ResTy,
SDLoc DL(Op); SDLoc DL(Op);
return DAG.getNode(MipsISD::SHF, DL, ResTy, return DAG.getNode(MipsISD::SHF, DL, ResTy,
DAG.getTargetConstant(Imm, DL, MVT::i32), DAG.getConstant(Imm, DL, MVT::i32), Op->getOperand(0));
Op->getOperand(0));
} }
/// Determine whether a range fits a regular pattern of values. /// Determine whether a range fits a regular pattern of values.

View File

@ -331,7 +331,7 @@ class VXBX_Int_Ty<bits<11> xo, string opc, Intrinsic IntID, ValueType Ty>
class VXCR_Int_Ty<bits<11> xo, string opc, Intrinsic IntID, ValueType Ty> class VXCR_Int_Ty<bits<11> xo, string opc, Intrinsic IntID, ValueType Ty>
: VXForm_CR<xo, (outs vrrc:$vD), (ins vrrc:$vA, u1imm:$ST, u4imm:$SIX), : VXForm_CR<xo, (outs vrrc:$vD), (ins vrrc:$vA, u1imm:$ST, u4imm:$SIX),
!strconcat(opc, " $vD, $vA, $ST, $SIX"), IIC_VecFP, !strconcat(opc, " $vD, $vA, $ST, $SIX"), IIC_VecFP,
[(set Ty:$vD, (IntID Ty:$vA, timm:$ST, timm:$SIX))]>; [(set Ty:$vD, (IntID Ty:$vA, imm:$ST, imm:$SIX))]>;
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
// Instruction Definitions. // Instruction Definitions.
@ -401,10 +401,10 @@ let isCodeGenOnly = 1 in {
def MFVSCR : VXForm_4<1540, (outs vrrc:$vD), (ins), def MFVSCR : VXForm_4<1540, (outs vrrc:$vD), (ins),
"mfvscr $vD", IIC_LdStStore, "mfvscr $vD", IIC_LdStStore,
[(set v8i16:$vD, (int_ppc_altivec_mfvscr))]>; [(set v8i16:$vD, (int_ppc_altivec_mfvscr))]>;
def MTVSCR : VXForm_5<1604, (outs), (ins vrrc:$vB), def MTVSCR : VXForm_5<1604, (outs), (ins vrrc:$vB),
"mtvscr $vB", IIC_LdStLoad, "mtvscr $vB", IIC_LdStLoad,
[(int_ppc_altivec_mtvscr v4i32:$vB)]>; [(int_ppc_altivec_mtvscr v4i32:$vB)]>;
let PPC970_Unit = 2, mayLoad = 1, mayStore = 0 in { // Loads. let PPC970_Unit = 2, mayLoad = 1, mayStore = 0 in { // Loads.
def LVEBX: XForm_1_memOp<31, 7, (outs vrrc:$vD), (ins memrr:$src), def LVEBX: XForm_1_memOp<31, 7, (outs vrrc:$vD), (ins memrr:$src),

View File

@ -2868,12 +2868,12 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
(outs vsrc:$XT), (ins u7imm:$DCMX, vsrc:$XB), (outs vsrc:$XT), (ins u7imm:$DCMX, vsrc:$XB),
"xvtstdcsp $XT, $XB, $DCMX", IIC_VecFP, "xvtstdcsp $XT, $XB, $DCMX", IIC_VecFP,
[(set v4i32: $XT, [(set v4i32: $XT,
(int_ppc_vsx_xvtstdcsp v4f32:$XB, timm:$DCMX))]>; (int_ppc_vsx_xvtstdcsp v4f32:$XB, imm:$DCMX))]>;
def XVTSTDCDP : XX2_RD6_DCMX7_RS6<60, 15, 5, def XVTSTDCDP : XX2_RD6_DCMX7_RS6<60, 15, 5,
(outs vsrc:$XT), (ins u7imm:$DCMX, vsrc:$XB), (outs vsrc:$XT), (ins u7imm:$DCMX, vsrc:$XB),
"xvtstdcdp $XT, $XB, $DCMX", IIC_VecFP, "xvtstdcdp $XT, $XB, $DCMX", IIC_VecFP,
[(set v2i64: $XT, [(set v2i64: $XT,
(int_ppc_vsx_xvtstdcdp v2f64:$XB, timm:$DCMX))]>; (int_ppc_vsx_xvtstdcdp v2f64:$XB, imm:$DCMX))]>;
//===--------------------------------------------------------------------===// //===--------------------------------------------------------------------===//

View File

@ -214,12 +214,12 @@ class PseudoMaskedAMOUMinUMax
} }
class PseudoMaskedAMOPat<Intrinsic intrin, Pseudo AMOInst> class PseudoMaskedAMOPat<Intrinsic intrin, Pseudo AMOInst>
: Pat<(intrin GPR:$addr, GPR:$incr, GPR:$mask, timm:$ordering), : Pat<(intrin GPR:$addr, GPR:$incr, GPR:$mask, imm:$ordering),
(AMOInst GPR:$addr, GPR:$incr, GPR:$mask, imm:$ordering)>; (AMOInst GPR:$addr, GPR:$incr, GPR:$mask, imm:$ordering)>;
class PseudoMaskedAMOMinMaxPat<Intrinsic intrin, Pseudo AMOInst> class PseudoMaskedAMOMinMaxPat<Intrinsic intrin, Pseudo AMOInst>
: Pat<(intrin GPR:$addr, GPR:$incr, GPR:$mask, GPR:$shiftamt, : Pat<(intrin GPR:$addr, GPR:$incr, GPR:$mask, GPR:$shiftamt,
timm:$ordering), imm:$ordering),
(AMOInst GPR:$addr, GPR:$incr, GPR:$mask, GPR:$shiftamt, (AMOInst GPR:$addr, GPR:$incr, GPR:$mask, GPR:$shiftamt,
imm:$ordering)>; imm:$ordering)>;
@ -288,7 +288,7 @@ def PseudoMaskedCmpXchg32
} }
def : Pat<(int_riscv_masked_cmpxchg_i32 def : Pat<(int_riscv_masked_cmpxchg_i32
GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering), GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, imm:$ordering),
(PseudoMaskedCmpXchg32 (PseudoMaskedCmpXchg32
GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, imm:$ordering)>; GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, imm:$ordering)>;
@ -365,7 +365,7 @@ def PseudoCmpXchg64 : PseudoCmpXchg;
defm : PseudoCmpXchgPat<"atomic_cmp_swap_64", PseudoCmpXchg64>; defm : PseudoCmpXchgPat<"atomic_cmp_swap_64", PseudoCmpXchg64>;
def : Pat<(int_riscv_masked_cmpxchg_i64 def : Pat<(int_riscv_masked_cmpxchg_i64
GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering), GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, imm:$ordering),
(PseudoMaskedCmpXchg32 (PseudoMaskedCmpXchg32
GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, imm:$ordering)>; GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, imm:$ordering)>;
} // Predicates = [HasStdExtA, IsRV64] } // Predicates = [HasStdExtA, IsRV64]

View File

@ -1146,7 +1146,7 @@ void SystemZDAGToDAGISel::loadVectorConstant(
SDLoc DL(Node); SDLoc DL(Node);
SmallVector<SDValue, 2> Ops; SmallVector<SDValue, 2> Ops;
for (unsigned OpVal : VCI.OpVals) for (unsigned OpVal : VCI.OpVals)
Ops.push_back(CurDAG->getTargetConstant(OpVal, DL, MVT::i32)); Ops.push_back(CurDAG->getConstant(OpVal, DL, MVT::i32));
SDValue Op = CurDAG->getNode(VCI.Opcode, DL, VCI.VecVT, Ops); SDValue Op = CurDAG->getNode(VCI.Opcode, DL, VCI.VecVT, Ops);
if (VCI.VecVT == VT.getSimpleVT()) if (VCI.VecVT == VT.getSimpleVT())
@ -1550,8 +1550,8 @@ void SystemZDAGToDAGISel::Select(SDNode *Node) {
uint64_t ConstCCMask = uint64_t ConstCCMask =
cast<ConstantSDNode>(CCMask.getNode())->getZExtValue(); cast<ConstantSDNode>(CCMask.getNode())->getZExtValue();
// Invert the condition. // Invert the condition.
CCMask = CurDAG->getTargetConstant(ConstCCValid ^ ConstCCMask, CCMask = CurDAG->getConstant(ConstCCValid ^ ConstCCMask, SDLoc(Node),
SDLoc(Node), CCMask.getValueType()); CCMask.getValueType());
SDValue Op4 = Node->getOperand(4); SDValue Op4 = Node->getOperand(4);
SDNode *UpdatedNode = SDNode *UpdatedNode =
CurDAG->UpdateNodeOperands(Node, Op1, Op0, CCValid, CCMask, Op4); CurDAG->UpdateNodeOperands(Node, Op1, Op0, CCValid, CCMask, Op4);

View File

@ -2549,12 +2549,12 @@ static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
} }
if (C.Opcode == SystemZISD::ICMP) if (C.Opcode == SystemZISD::ICMP)
return DAG.getNode(SystemZISD::ICMP, DL, MVT::i32, C.Op0, C.Op1, return DAG.getNode(SystemZISD::ICMP, DL, MVT::i32, C.Op0, C.Op1,
DAG.getTargetConstant(C.ICmpType, DL, MVT::i32)); DAG.getConstant(C.ICmpType, DL, MVT::i32));
if (C.Opcode == SystemZISD::TM) { if (C.Opcode == SystemZISD::TM) {
bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) != bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) !=
bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_1)); bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_1));
return DAG.getNode(SystemZISD::TM, DL, MVT::i32, C.Op0, C.Op1, return DAG.getNode(SystemZISD::TM, DL, MVT::i32, C.Op0, C.Op1,
DAG.getTargetConstant(RegisterOnly, DL, MVT::i32)); DAG.getConstant(RegisterOnly, DL, MVT::i32));
} }
return DAG.getNode(C.Opcode, DL, MVT::i32, C.Op0, C.Op1); return DAG.getNode(C.Opcode, DL, MVT::i32, C.Op0, C.Op1);
} }
@ -2592,10 +2592,10 @@ static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
// in CCValid, so other values can be ignored. // in CCValid, so other values can be ignored.
static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg, static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg,
unsigned CCValid, unsigned CCMask) { unsigned CCValid, unsigned CCMask) {
SDValue Ops[] = {DAG.getConstant(1, DL, MVT::i32), SDValue Ops[] = { DAG.getConstant(1, DL, MVT::i32),
DAG.getConstant(0, DL, MVT::i32), DAG.getConstant(0, DL, MVT::i32),
DAG.getTargetConstant(CCValid, DL, MVT::i32), DAG.getConstant(CCValid, DL, MVT::i32),
DAG.getTargetConstant(CCMask, DL, MVT::i32), CCReg}; DAG.getConstant(CCMask, DL, MVT::i32), CCReg };
return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, MVT::i32, Ops); return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, MVT::i32, Ops);
} }
@ -2757,10 +2757,9 @@ SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL)); Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
SDValue CCReg = emitCmp(DAG, DL, C); SDValue CCReg = emitCmp(DAG, DL, C);
return DAG.getNode( return DAG.getNode(SystemZISD::BR_CCMASK, DL, Op.getValueType(),
SystemZISD::BR_CCMASK, DL, Op.getValueType(), Op.getOperand(0), Op.getOperand(0), DAG.getConstant(C.CCValid, DL, MVT::i32),
DAG.getTargetConstant(C.CCValid, DL, MVT::i32), DAG.getConstant(C.CCMask, DL, MVT::i32), Dest, CCReg);
DAG.getTargetConstant(C.CCMask, DL, MVT::i32), Dest, CCReg);
} }
// Return true if Pos is CmpOp and Neg is the negative of CmpOp, // Return true if Pos is CmpOp and Neg is the negative of CmpOp,
@ -2811,9 +2810,8 @@ SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,
} }
SDValue CCReg = emitCmp(DAG, DL, C); SDValue CCReg = emitCmp(DAG, DL, C);
SDValue Ops[] = {TrueOp, FalseOp, SDValue Ops[] = {TrueOp, FalseOp, DAG.getConstant(C.CCValid, DL, MVT::i32),
DAG.getTargetConstant(C.CCValid, DL, MVT::i32), DAG.getConstant(C.CCMask, DL, MVT::i32), CCReg};
DAG.getTargetConstant(C.CCMask, DL, MVT::i32), CCReg};
return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, Op.getValueType(), Ops); return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, Op.getValueType(), Ops);
} }
@ -3900,8 +3898,11 @@ SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op,
bool IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue(); bool IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ; unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ;
auto *Node = cast<MemIntrinsicSDNode>(Op.getNode()); auto *Node = cast<MemIntrinsicSDNode>(Op.getNode());
SDValue Ops[] = {Op.getOperand(0), DAG.getTargetConstant(Code, DL, MVT::i32), SDValue Ops[] = {
Op.getOperand(1)}; Op.getOperand(0),
DAG.getConstant(Code, DL, MVT::i32),
Op.getOperand(1)
};
return DAG.getMemIntrinsicNode(SystemZISD::PREFETCH, DL, return DAG.getMemIntrinsicNode(SystemZISD::PREFETCH, DL,
Node->getVTList(), Ops, Node->getVTList(), Ops,
Node->getMemoryVT(), Node->getMemOperand()); Node->getMemoryVT(), Node->getMemOperand());
@ -4243,7 +4244,7 @@ static SDValue getPermuteNode(SelectionDAG &DAG, const SDLoc &DL,
Op1 = DAG.getNode(ISD::BITCAST, DL, InVT, Op1); Op1 = DAG.getNode(ISD::BITCAST, DL, InVT, Op1);
SDValue Op; SDValue Op;
if (P.Opcode == SystemZISD::PERMUTE_DWORDS) { if (P.Opcode == SystemZISD::PERMUTE_DWORDS) {
SDValue Op2 = DAG.getTargetConstant(P.Operand, DL, MVT::i32); SDValue Op2 = DAG.getConstant(P.Operand, DL, MVT::i32);
Op = DAG.getNode(SystemZISD::PERMUTE_DWORDS, DL, InVT, Op0, Op1, Op2); Op = DAG.getNode(SystemZISD::PERMUTE_DWORDS, DL, InVT, Op0, Op1, Op2);
} else if (P.Opcode == SystemZISD::PACK) { } else if (P.Opcode == SystemZISD::PACK) {
MVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(P.Operand * 8), MVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(P.Operand * 8),
@ -4268,8 +4269,7 @@ static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL,
unsigned StartIndex, OpNo0, OpNo1; unsigned StartIndex, OpNo0, OpNo1;
if (isShlDoublePermute(Bytes, StartIndex, OpNo0, OpNo1)) if (isShlDoublePermute(Bytes, StartIndex, OpNo0, OpNo1))
return DAG.getNode(SystemZISD::SHL_DOUBLE, DL, MVT::v16i8, Ops[OpNo0], return DAG.getNode(SystemZISD::SHL_DOUBLE, DL, MVT::v16i8, Ops[OpNo0],
Ops[OpNo1], Ops[OpNo1], DAG.getConstant(StartIndex, DL, MVT::i32));
DAG.getTargetConstant(StartIndex, DL, MVT::i32));
// Fall back on VPERM. Construct an SDNode for the permute vector. // Fall back on VPERM. Construct an SDNode for the permute vector.
SDValue IndexNodes[SystemZ::VectorBytes]; SDValue IndexNodes[SystemZ::VectorBytes];
@ -4767,7 +4767,7 @@ SDValue SystemZTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0.getOperand(Index)); return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0.getOperand(Index));
// Otherwise keep it as a vector-to-vector operation. // Otherwise keep it as a vector-to-vector operation.
return DAG.getNode(SystemZISD::SPLAT, DL, VT, Op.getOperand(0), return DAG.getNode(SystemZISD::SPLAT, DL, VT, Op.getOperand(0),
DAG.getTargetConstant(Index, DL, MVT::i32)); DAG.getConstant(Index, DL, MVT::i32));
} }
GeneralShuffle GS(VT); GeneralShuffle GS(VT);
@ -6057,8 +6057,8 @@ SDValue SystemZTargetLowering::combineBR_CCMASK(
if (combineCCMask(CCReg, CCValidVal, CCMaskVal)) if (combineCCMask(CCReg, CCValidVal, CCMaskVal))
return DAG.getNode(SystemZISD::BR_CCMASK, SDLoc(N), N->getValueType(0), return DAG.getNode(SystemZISD::BR_CCMASK, SDLoc(N), N->getValueType(0),
Chain, Chain,
DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32), DAG.getConstant(CCValidVal, SDLoc(N), MVT::i32),
DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32), DAG.getConstant(CCMaskVal, SDLoc(N), MVT::i32),
N->getOperand(3), CCReg); N->getOperand(3), CCReg);
return SDValue(); return SDValue();
} }
@ -6079,9 +6079,10 @@ SDValue SystemZTargetLowering::combineSELECT_CCMASK(
if (combineCCMask(CCReg, CCValidVal, CCMaskVal)) if (combineCCMask(CCReg, CCValidVal, CCMaskVal))
return DAG.getNode(SystemZISD::SELECT_CCMASK, SDLoc(N), N->getValueType(0), return DAG.getNode(SystemZISD::SELECT_CCMASK, SDLoc(N), N->getValueType(0),
N->getOperand(0), N->getOperand(1), N->getOperand(0),
DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32), N->getOperand(1),
DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32), DAG.getConstant(CCValidVal, SDLoc(N), MVT::i32),
DAG.getConstant(CCMaskVal, SDLoc(N), MVT::i32),
CCReg); CCReg);
return SDValue(); return SDValue();
} }

View File

@ -2141,17 +2141,17 @@ class FixedCondBranchRXY<CondVariant V, string mnemonic, bits<16> opcode,
} }
class CmpBranchRIEa<string mnemonic, bits<16> opcode, class CmpBranchRIEa<string mnemonic, bits<16> opcode,
RegisterOperand cls, ImmOpWithPattern imm> RegisterOperand cls, Immediate imm>
: InstRIEa<opcode, (outs), (ins cls:$R1, imm:$I2, cond4:$M3), : InstRIEa<opcode, (outs), (ins cls:$R1, imm:$I2, cond4:$M3),
mnemonic#"$M3\t$R1, $I2", []>; mnemonic#"$M3\t$R1, $I2", []>;
class AsmCmpBranchRIEa<string mnemonic, bits<16> opcode, class AsmCmpBranchRIEa<string mnemonic, bits<16> opcode,
RegisterOperand cls, ImmOpWithPattern imm> RegisterOperand cls, Immediate imm>
: InstRIEa<opcode, (outs), (ins cls:$R1, imm:$I2, imm32zx4:$M3), : InstRIEa<opcode, (outs), (ins cls:$R1, imm:$I2, imm32zx4:$M3),
mnemonic#"\t$R1, $I2, $M3", []>; mnemonic#"\t$R1, $I2, $M3", []>;
class FixedCmpBranchRIEa<CondVariant V, string mnemonic, bits<16> opcode, class FixedCmpBranchRIEa<CondVariant V, string mnemonic, bits<16> opcode,
RegisterOperand cls, ImmOpWithPattern imm> RegisterOperand cls, Immediate imm>
: InstRIEa<opcode, (outs), (ins cls:$R1, imm:$I2), : InstRIEa<opcode, (outs), (ins cls:$R1, imm:$I2),
mnemonic#V.suffix#"\t$R1, $I2", []> { mnemonic#V.suffix#"\t$R1, $I2", []> {
let isAsmParserOnly = V.alternate; let isAsmParserOnly = V.alternate;
@ -2159,7 +2159,7 @@ class FixedCmpBranchRIEa<CondVariant V, string mnemonic, bits<16> opcode,
} }
multiclass CmpBranchRIEaPair<string mnemonic, bits<16> opcode, multiclass CmpBranchRIEaPair<string mnemonic, bits<16> opcode,
RegisterOperand cls, ImmOpWithPattern imm> { RegisterOperand cls, Immediate imm> {
let isCodeGenOnly = 1 in let isCodeGenOnly = 1 in
def "" : CmpBranchRIEa<mnemonic, opcode, cls, imm>; def "" : CmpBranchRIEa<mnemonic, opcode, cls, imm>;
def Asm : AsmCmpBranchRIEa<mnemonic, opcode, cls, imm>; def Asm : AsmCmpBranchRIEa<mnemonic, opcode, cls, imm>;
@ -2193,19 +2193,19 @@ multiclass CmpBranchRIEbPair<string mnemonic, bits<16> opcode,
} }
class CmpBranchRIEc<string mnemonic, bits<16> opcode, class CmpBranchRIEc<string mnemonic, bits<16> opcode,
RegisterOperand cls, ImmOpWithPattern imm> RegisterOperand cls, Immediate imm>
: InstRIEc<opcode, (outs), : InstRIEc<opcode, (outs),
(ins cls:$R1, imm:$I2, cond4:$M3, brtarget16:$RI4), (ins cls:$R1, imm:$I2, cond4:$M3, brtarget16:$RI4),
mnemonic#"$M3\t$R1, $I2, $RI4", []>; mnemonic#"$M3\t$R1, $I2, $RI4", []>;
class AsmCmpBranchRIEc<string mnemonic, bits<16> opcode, class AsmCmpBranchRIEc<string mnemonic, bits<16> opcode,
RegisterOperand cls, ImmOpWithPattern imm> RegisterOperand cls, Immediate imm>
: InstRIEc<opcode, (outs), : InstRIEc<opcode, (outs),
(ins cls:$R1, imm:$I2, imm32zx4:$M3, brtarget16:$RI4), (ins cls:$R1, imm:$I2, imm32zx4:$M3, brtarget16:$RI4),
mnemonic#"\t$R1, $I2, $M3, $RI4", []>; mnemonic#"\t$R1, $I2, $M3, $RI4", []>;
class FixedCmpBranchRIEc<CondVariant V, string mnemonic, bits<16> opcode, class FixedCmpBranchRIEc<CondVariant V, string mnemonic, bits<16> opcode,
RegisterOperand cls, ImmOpWithPattern imm> RegisterOperand cls, Immediate imm>
: InstRIEc<opcode, (outs), (ins cls:$R1, imm:$I2, brtarget16:$RI4), : InstRIEc<opcode, (outs), (ins cls:$R1, imm:$I2, brtarget16:$RI4),
mnemonic#V.suffix#"\t$R1, $I2, $RI4", []> { mnemonic#V.suffix#"\t$R1, $I2, $RI4", []> {
let isAsmParserOnly = V.alternate; let isAsmParserOnly = V.alternate;
@ -2213,7 +2213,7 @@ class FixedCmpBranchRIEc<CondVariant V, string mnemonic, bits<16> opcode,
} }
multiclass CmpBranchRIEcPair<string mnemonic, bits<16> opcode, multiclass CmpBranchRIEcPair<string mnemonic, bits<16> opcode,
RegisterOperand cls, ImmOpWithPattern imm> { RegisterOperand cls, Immediate imm> {
let isCodeGenOnly = 1 in let isCodeGenOnly = 1 in
def "" : CmpBranchRIEc<mnemonic, opcode, cls, imm>; def "" : CmpBranchRIEc<mnemonic, opcode, cls, imm>;
def Asm : AsmCmpBranchRIEc<mnemonic, opcode, cls, imm>; def Asm : AsmCmpBranchRIEc<mnemonic, opcode, cls, imm>;
@ -2272,19 +2272,19 @@ multiclass CmpBranchRRSPair<string mnemonic, bits<16> opcode,
} }
class CmpBranchRIS<string mnemonic, bits<16> opcode, class CmpBranchRIS<string mnemonic, bits<16> opcode,
RegisterOperand cls, ImmOpWithPattern imm> RegisterOperand cls, Immediate imm>
: InstRIS<opcode, (outs), : InstRIS<opcode, (outs),
(ins cls:$R1, imm:$I2, cond4:$M3, bdaddr12only:$BD4), (ins cls:$R1, imm:$I2, cond4:$M3, bdaddr12only:$BD4),
mnemonic#"$M3\t$R1, $I2, $BD4", []>; mnemonic#"$M3\t$R1, $I2, $BD4", []>;
class AsmCmpBranchRIS<string mnemonic, bits<16> opcode, class AsmCmpBranchRIS<string mnemonic, bits<16> opcode,
RegisterOperand cls, ImmOpWithPattern imm> RegisterOperand cls, Immediate imm>
: InstRIS<opcode, (outs), : InstRIS<opcode, (outs),
(ins cls:$R1, imm:$I2, imm32zx4:$M3, bdaddr12only:$BD4), (ins cls:$R1, imm:$I2, imm32zx4:$M3, bdaddr12only:$BD4),
mnemonic#"\t$R1, $I2, $M3, $BD4", []>; mnemonic#"\t$R1, $I2, $M3, $BD4", []>;
class FixedCmpBranchRIS<CondVariant V, string mnemonic, bits<16> opcode, class FixedCmpBranchRIS<CondVariant V, string mnemonic, bits<16> opcode,
RegisterOperand cls, ImmOpWithPattern imm> RegisterOperand cls, Immediate imm>
: InstRIS<opcode, (outs), (ins cls:$R1, imm:$I2, bdaddr12only:$BD4), : InstRIS<opcode, (outs), (ins cls:$R1, imm:$I2, bdaddr12only:$BD4),
mnemonic#V.suffix#"\t$R1, $I2, $BD4", []> { mnemonic#V.suffix#"\t$R1, $I2, $BD4", []> {
let isAsmParserOnly = V.alternate; let isAsmParserOnly = V.alternate;
@ -2292,7 +2292,7 @@ class FixedCmpBranchRIS<CondVariant V, string mnemonic, bits<16> opcode,
} }
multiclass CmpBranchRISPair<string mnemonic, bits<16> opcode, multiclass CmpBranchRISPair<string mnemonic, bits<16> opcode,
RegisterOperand cls, ImmOpWithPattern imm> { RegisterOperand cls, Immediate imm> {
let isCodeGenOnly = 1 in let isCodeGenOnly = 1 in
def "" : CmpBranchRIS<mnemonic, opcode, cls, imm>; def "" : CmpBranchRIS<mnemonic, opcode, cls, imm>;
def Asm : AsmCmpBranchRIS<mnemonic, opcode, cls, imm>; def Asm : AsmCmpBranchRIS<mnemonic, opcode, cls, imm>;
@ -2585,7 +2585,7 @@ multiclass StoreMultipleVRSaAlign<string mnemonic, bits<16> opcode> {
// We therefore match the address in the same way as a normal store and // We therefore match the address in the same way as a normal store and
// only use the StoreSI* instruction if the matched address is suitable. // only use the StoreSI* instruction if the matched address is suitable.
class StoreSI<string mnemonic, bits<8> opcode, SDPatternOperator operator, class StoreSI<string mnemonic, bits<8> opcode, SDPatternOperator operator,
ImmOpWithPattern imm> Immediate imm>
: InstSI<opcode, (outs), (ins mviaddr12pair:$BD1, imm:$I2), : InstSI<opcode, (outs), (ins mviaddr12pair:$BD1, imm:$I2),
mnemonic#"\t$BD1, $I2", mnemonic#"\t$BD1, $I2",
[(operator imm:$I2, mviaddr12pair:$BD1)]> { [(operator imm:$I2, mviaddr12pair:$BD1)]> {
@ -2593,7 +2593,7 @@ class StoreSI<string mnemonic, bits<8> opcode, SDPatternOperator operator,
} }
class StoreSIY<string mnemonic, bits<16> opcode, SDPatternOperator operator, class StoreSIY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
ImmOpWithPattern imm> Immediate imm>
: InstSIY<opcode, (outs), (ins mviaddr20pair:$BD1, imm:$I2), : InstSIY<opcode, (outs), (ins mviaddr20pair:$BD1, imm:$I2),
mnemonic#"\t$BD1, $I2", mnemonic#"\t$BD1, $I2",
[(operator imm:$I2, mviaddr20pair:$BD1)]> { [(operator imm:$I2, mviaddr20pair:$BD1)]> {
@ -2601,7 +2601,7 @@ class StoreSIY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
} }
class StoreSIL<string mnemonic, bits<16> opcode, SDPatternOperator operator, class StoreSIL<string mnemonic, bits<16> opcode, SDPatternOperator operator,
ImmOpWithPattern imm> Immediate imm>
: InstSIL<opcode, (outs), (ins mviaddr12pair:$BD1, imm:$I2), : InstSIL<opcode, (outs), (ins mviaddr12pair:$BD1, imm:$I2),
mnemonic#"\t$BD1, $I2", mnemonic#"\t$BD1, $I2",
[(operator imm:$I2, mviaddr12pair:$BD1)]> { [(operator imm:$I2, mviaddr12pair:$BD1)]> {
@ -2609,7 +2609,7 @@ class StoreSIL<string mnemonic, bits<16> opcode, SDPatternOperator operator,
} }
multiclass StoreSIPair<string mnemonic, bits<8> siOpcode, bits<16> siyOpcode, multiclass StoreSIPair<string mnemonic, bits<8> siOpcode, bits<16> siyOpcode,
SDPatternOperator operator, ImmOpWithPattern imm> { SDPatternOperator operator, Immediate imm> {
let DispKey = mnemonic in { let DispKey = mnemonic in {
let DispSize = "12" in let DispSize = "12" in
def "" : StoreSI<mnemonic, siOpcode, operator, imm>; def "" : StoreSI<mnemonic, siOpcode, operator, imm>;
@ -2665,7 +2665,7 @@ multiclass CondStoreRSYPair<string mnemonic, bits<16> opcode,
def Asm : AsmCondStoreRSY<mnemonic, opcode, cls, bytes, mode>; def Asm : AsmCondStoreRSY<mnemonic, opcode, cls, bytes, mode>;
} }
class SideEffectUnaryI<string mnemonic, bits<8> opcode, ImmOpWithPattern imm> class SideEffectUnaryI<string mnemonic, bits<8> opcode, Immediate imm>
: InstI<opcode, (outs), (ins imm:$I1), : InstI<opcode, (outs), (ins imm:$I1),
mnemonic#"\t$I1", []>; mnemonic#"\t$I1", []>;
@ -2761,13 +2761,13 @@ class UnaryMemRRFc<string mnemonic, bits<16> opcode,
} }
class UnaryRI<string mnemonic, bits<12> opcode, SDPatternOperator operator, class UnaryRI<string mnemonic, bits<12> opcode, SDPatternOperator operator,
RegisterOperand cls, ImmOpWithPattern imm> RegisterOperand cls, Immediate imm>
: InstRIa<opcode, (outs cls:$R1), (ins imm:$I2), : InstRIa<opcode, (outs cls:$R1), (ins imm:$I2),
mnemonic#"\t$R1, $I2", mnemonic#"\t$R1, $I2",
[(set cls:$R1, (operator imm:$I2))]>; [(set cls:$R1, (operator imm:$I2))]>;
class UnaryRIL<string mnemonic, bits<12> opcode, SDPatternOperator operator, class UnaryRIL<string mnemonic, bits<12> opcode, SDPatternOperator operator,
RegisterOperand cls, ImmOpWithPattern imm> RegisterOperand cls, Immediate imm>
: InstRILa<opcode, (outs cls:$R1), (ins imm:$I2), : InstRILa<opcode, (outs cls:$R1), (ins imm:$I2),
mnemonic#"\t$R1, $I2", mnemonic#"\t$R1, $I2",
[(set cls:$R1, (operator imm:$I2))]>; [(set cls:$R1, (operator imm:$I2))]>;
@ -2885,14 +2885,14 @@ multiclass UnaryRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode,
} }
class UnaryVRIa<string mnemonic, bits<16> opcode, SDPatternOperator operator, class UnaryVRIa<string mnemonic, bits<16> opcode, SDPatternOperator operator,
TypedReg tr, ImmOpWithPattern imm, bits<4> type = 0> TypedReg tr, Immediate imm, bits<4> type = 0>
: InstVRIa<opcode, (outs tr.op:$V1), (ins imm:$I2), : InstVRIa<opcode, (outs tr.op:$V1), (ins imm:$I2),
mnemonic#"\t$V1, $I2", mnemonic#"\t$V1, $I2",
[(set (tr.vt tr.op:$V1), (operator (i32 timm:$I2)))]> { [(set (tr.vt tr.op:$V1), (operator imm:$I2))]> {
let M3 = type; let M3 = type;
} }
class UnaryVRIaGeneric<string mnemonic, bits<16> opcode, ImmOpWithPattern imm> class UnaryVRIaGeneric<string mnemonic, bits<16> opcode, Immediate imm>
: InstVRIa<opcode, (outs VR128:$V1), (ins imm:$I2, imm32zx4:$M3), : InstVRIa<opcode, (outs VR128:$V1), (ins imm:$I2, imm32zx4:$M3),
mnemonic#"\t$V1, $I2, $M3", []>; mnemonic#"\t$V1, $I2, $M3", []>;
@ -3021,7 +3021,7 @@ class SideEffectBinaryRRFc<string mnemonic, bits<16> opcode,
} }
class SideEffectBinaryIE<string mnemonic, bits<16> opcode, class SideEffectBinaryIE<string mnemonic, bits<16> opcode,
ImmOpWithPattern imm1, ImmOpWithPattern imm2> Immediate imm1, Immediate imm2>
: InstIE<opcode, (outs), (ins imm1:$I1, imm2:$I2), : InstIE<opcode, (outs), (ins imm1:$I1, imm2:$I2),
mnemonic#"\t$I1, $I2", []>; mnemonic#"\t$I1, $I2", []>;
@ -3030,7 +3030,7 @@ class SideEffectBinarySI<string mnemonic, bits<8> opcode, Operand imm>
mnemonic#"\t$BD1, $I2", []>; mnemonic#"\t$BD1, $I2", []>;
class SideEffectBinarySIL<string mnemonic, bits<16> opcode, class SideEffectBinarySIL<string mnemonic, bits<16> opcode,
SDPatternOperator operator, ImmOpWithPattern imm> SDPatternOperator operator, Immediate imm>
: InstSIL<opcode, (outs), (ins bdaddr12only:$BD1, imm:$I2), : InstSIL<opcode, (outs), (ins bdaddr12only:$BD1, imm:$I2),
mnemonic#"\t$BD1, $I2", [(operator bdaddr12only:$BD1, imm:$I2)]>; mnemonic#"\t$BD1, $I2", [(operator bdaddr12only:$BD1, imm:$I2)]>;
@ -3165,7 +3165,7 @@ class BinaryRRFc<string mnemonic, bits<16> opcode,
mnemonic#"\t$R1, $R2, $M3", []>; mnemonic#"\t$R1, $R2, $M3", []>;
class BinaryMemRRFc<string mnemonic, bits<16> opcode, class BinaryMemRRFc<string mnemonic, bits<16> opcode,
RegisterOperand cls1, RegisterOperand cls2, ImmOpWithPattern imm> RegisterOperand cls1, RegisterOperand cls2, Immediate imm>
: InstRRFc<opcode, (outs cls2:$R2, cls1:$R1), (ins cls1:$R1src, imm:$M3), : InstRRFc<opcode, (outs cls2:$R2, cls1:$R1), (ins cls1:$R1src, imm:$M3),
mnemonic#"\t$R1, $R2, $M3", []> { mnemonic#"\t$R1, $R2, $M3", []> {
let Constraints = "$R1 = $R1src"; let Constraints = "$R1 = $R1src";
@ -3267,7 +3267,7 @@ multiclass CondBinaryRRFaPair<string mnemonic, bits<16> opcode,
} }
class BinaryRI<string mnemonic, bits<12> opcode, SDPatternOperator operator, class BinaryRI<string mnemonic, bits<12> opcode, SDPatternOperator operator,
RegisterOperand cls, ImmOpWithPattern imm> RegisterOperand cls, Immediate imm>
: InstRIa<opcode, (outs cls:$R1), (ins cls:$R1src, imm:$I2), : InstRIa<opcode, (outs cls:$R1), (ins cls:$R1src, imm:$I2),
mnemonic#"\t$R1, $I2", mnemonic#"\t$R1, $I2",
[(set cls:$R1, (operator cls:$R1src, imm:$I2))]> { [(set cls:$R1, (operator cls:$R1src, imm:$I2))]> {
@ -3276,14 +3276,14 @@ class BinaryRI<string mnemonic, bits<12> opcode, SDPatternOperator operator,
} }
class BinaryRIE<string mnemonic, bits<16> opcode, SDPatternOperator operator, class BinaryRIE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
RegisterOperand cls, ImmOpWithPattern imm> RegisterOperand cls, Immediate imm>
: InstRIEd<opcode, (outs cls:$R1), (ins cls:$R3, imm:$I2), : InstRIEd<opcode, (outs cls:$R1), (ins cls:$R3, imm:$I2),
mnemonic#"\t$R1, $R3, $I2", mnemonic#"\t$R1, $R3, $I2",
[(set cls:$R1, (operator cls:$R3, imm:$I2))]>; [(set cls:$R1, (operator cls:$R3, imm:$I2))]>;
multiclass BinaryRIAndK<string mnemonic, bits<12> opcode1, bits<16> opcode2, multiclass BinaryRIAndK<string mnemonic, bits<12> opcode1, bits<16> opcode2,
SDPatternOperator operator, RegisterOperand cls, SDPatternOperator operator, RegisterOperand cls,
ImmOpWithPattern imm> { Immediate imm> {
let NumOpsKey = mnemonic in { let NumOpsKey = mnemonic in {
let NumOpsValue = "3" in let NumOpsValue = "3" in
def K : BinaryRIE<mnemonic##"k", opcode2, operator, cls, imm>, def K : BinaryRIE<mnemonic##"k", opcode2, operator, cls, imm>,
@ -3294,7 +3294,7 @@ multiclass BinaryRIAndK<string mnemonic, bits<12> opcode1, bits<16> opcode2,
} }
class CondBinaryRIE<string mnemonic, bits<16> opcode, RegisterOperand cls, class CondBinaryRIE<string mnemonic, bits<16> opcode, RegisterOperand cls,
ImmOpWithPattern imm> Immediate imm>
: InstRIEg<opcode, (outs cls:$R1), : InstRIEg<opcode, (outs cls:$R1),
(ins cls:$R1src, imm:$I2, cond4:$valid, cond4:$M3), (ins cls:$R1src, imm:$I2, cond4:$valid, cond4:$M3),
mnemonic#"$M3\t$R1, $I2", mnemonic#"$M3\t$R1, $I2",
@ -3308,7 +3308,7 @@ class CondBinaryRIE<string mnemonic, bits<16> opcode, RegisterOperand cls,
// Like CondBinaryRIE, but used for the raw assembly form. The condition-code // Like CondBinaryRIE, but used for the raw assembly form. The condition-code
// mask is the third operand rather than being part of the mnemonic. // mask is the third operand rather than being part of the mnemonic.
class AsmCondBinaryRIE<string mnemonic, bits<16> opcode, RegisterOperand cls, class AsmCondBinaryRIE<string mnemonic, bits<16> opcode, RegisterOperand cls,
ImmOpWithPattern imm> Immediate imm>
: InstRIEg<opcode, (outs cls:$R1), : InstRIEg<opcode, (outs cls:$R1),
(ins cls:$R1src, imm:$I2, imm32zx4:$M3), (ins cls:$R1src, imm:$I2, imm32zx4:$M3),
mnemonic#"\t$R1, $I2, $M3", []> { mnemonic#"\t$R1, $I2, $M3", []> {
@ -3318,7 +3318,7 @@ class AsmCondBinaryRIE<string mnemonic, bits<16> opcode, RegisterOperand cls,
// Like CondBinaryRIE, but with a fixed CC mask. // Like CondBinaryRIE, but with a fixed CC mask.
class FixedCondBinaryRIE<CondVariant V, string mnemonic, bits<16> opcode, class FixedCondBinaryRIE<CondVariant V, string mnemonic, bits<16> opcode,
RegisterOperand cls, ImmOpWithPattern imm> RegisterOperand cls, Immediate imm>
: InstRIEg<opcode, (outs cls:$R1), (ins cls:$R1src, imm:$I2), : InstRIEg<opcode, (outs cls:$R1), (ins cls:$R1src, imm:$I2),
mnemonic#V.suffix#"\t$R1, $I2", []> { mnemonic#V.suffix#"\t$R1, $I2", []> {
let Constraints = "$R1 = $R1src"; let Constraints = "$R1 = $R1src";
@ -3328,14 +3328,14 @@ class FixedCondBinaryRIE<CondVariant V, string mnemonic, bits<16> opcode,
} }
multiclass CondBinaryRIEPair<string mnemonic, bits<16> opcode, multiclass CondBinaryRIEPair<string mnemonic, bits<16> opcode,
RegisterOperand cls, ImmOpWithPattern imm> { RegisterOperand cls, Immediate imm> {
let isCodeGenOnly = 1 in let isCodeGenOnly = 1 in
def "" : CondBinaryRIE<mnemonic, opcode, cls, imm>; def "" : CondBinaryRIE<mnemonic, opcode, cls, imm>;
def Asm : AsmCondBinaryRIE<mnemonic, opcode, cls, imm>; def Asm : AsmCondBinaryRIE<mnemonic, opcode, cls, imm>;
} }
class BinaryRIL<string mnemonic, bits<12> opcode, SDPatternOperator operator, class BinaryRIL<string mnemonic, bits<12> opcode, SDPatternOperator operator,
RegisterOperand cls, ImmOpWithPattern imm> RegisterOperand cls, Immediate imm>
: InstRILa<opcode, (outs cls:$R1), (ins cls:$R1src, imm:$I2), : InstRILa<opcode, (outs cls:$R1), (ins cls:$R1src, imm:$I2),
mnemonic#"\t$R1, $I2", mnemonic#"\t$R1, $I2",
[(set cls:$R1, (operator cls:$R1src, imm:$I2))]> { [(set cls:$R1, (operator cls:$R1src, imm:$I2))]> {
@ -3484,7 +3484,7 @@ class BinaryVRIb<string mnemonic, bits<16> opcode, SDPatternOperator operator,
TypedReg tr, bits<4> type> TypedReg tr, bits<4> type>
: InstVRIb<opcode, (outs tr.op:$V1), (ins imm32zx8:$I2, imm32zx8:$I3), : InstVRIb<opcode, (outs tr.op:$V1), (ins imm32zx8:$I2, imm32zx8:$I3),
mnemonic#"\t$V1, $I2, $I3", mnemonic#"\t$V1, $I2, $I3",
[(set (tr.vt tr.op:$V1), (operator imm32zx8_timm:$I2, imm32zx8_timm:$I3))]> { [(set (tr.vt tr.op:$V1), (operator imm32zx8:$I2, imm32zx8:$I3))]> {
let M4 = type; let M4 = type;
} }
@ -3498,7 +3498,7 @@ class BinaryVRIc<string mnemonic, bits<16> opcode, SDPatternOperator operator,
: InstVRIc<opcode, (outs tr1.op:$V1), (ins tr2.op:$V3, imm32zx16:$I2), : InstVRIc<opcode, (outs tr1.op:$V1), (ins tr2.op:$V3, imm32zx16:$I2),
mnemonic#"\t$V1, $V3, $I2", mnemonic#"\t$V1, $V3, $I2",
[(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V3), [(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V3),
imm32zx16_timm:$I2))]> { imm32zx16:$I2))]> {
let M4 = type; let M4 = type;
} }
@ -3512,7 +3512,7 @@ class BinaryVRIe<string mnemonic, bits<16> opcode, SDPatternOperator operator,
: InstVRIe<opcode, (outs tr1.op:$V1), (ins tr2.op:$V2, imm32zx12:$I3), : InstVRIe<opcode, (outs tr1.op:$V1), (ins tr2.op:$V2, imm32zx12:$I3),
mnemonic#"\t$V1, $V2, $I3", mnemonic#"\t$V1, $V2, $I3",
[(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V2), [(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V2),
imm32zx12_timm:$I3))]> { imm32zx12:$I3))]> {
let M4 = type; let M4 = type;
let M5 = m5; let M5 = m5;
} }
@ -3715,7 +3715,7 @@ class BinaryVRX<string mnemonic, bits<16> opcode, SDPatternOperator operator,
: InstVRX<opcode, (outs VR128:$V1), (ins bdxaddr12only:$XBD2, imm32zx4:$M3), : InstVRX<opcode, (outs VR128:$V1), (ins bdxaddr12only:$XBD2, imm32zx4:$M3),
mnemonic#"\t$V1, $XBD2, $M3", mnemonic#"\t$V1, $XBD2, $M3",
[(set (tr.vt tr.op:$V1), (operator bdxaddr12only:$XBD2, [(set (tr.vt tr.op:$V1), (operator bdxaddr12only:$XBD2,
imm32zx4_timm:$M3))]> { imm32zx4:$M3))]> {
let mayLoad = 1; let mayLoad = 1;
let AccessBytes = bytes; let AccessBytes = bytes;
} }
@ -3765,7 +3765,7 @@ class BinaryVSI<string mnemonic, bits<16> opcode, SDPatternOperator operator,
} }
class StoreBinaryVRV<string mnemonic, bits<16> opcode, bits<5> bytes, class StoreBinaryVRV<string mnemonic, bits<16> opcode, bits<5> bytes,
ImmOpWithPattern index> Immediate index>
: InstVRV<opcode, (outs), (ins VR128:$V1, bdvaddr12only:$VBD2, index:$M3), : InstVRV<opcode, (outs), (ins VR128:$V1, bdvaddr12only:$VBD2, index:$M3),
mnemonic#"\t$V1, $VBD2, $M3", []> { mnemonic#"\t$V1, $VBD2, $M3", []> {
let mayStore = 1; let mayStore = 1;
@ -3774,7 +3774,7 @@ class StoreBinaryVRV<string mnemonic, bits<16> opcode, bits<5> bytes,
class StoreBinaryVRX<string mnemonic, bits<16> opcode, class StoreBinaryVRX<string mnemonic, bits<16> opcode,
SDPatternOperator operator, TypedReg tr, bits<5> bytes, SDPatternOperator operator, TypedReg tr, bits<5> bytes,
ImmOpWithPattern index> Immediate index>
: InstVRX<opcode, (outs), (ins tr.op:$V1, bdxaddr12only:$XBD2, index:$M3), : InstVRX<opcode, (outs), (ins tr.op:$V1, bdxaddr12only:$XBD2, index:$M3),
mnemonic#"\t$V1, $XBD2, $M3", mnemonic#"\t$V1, $XBD2, $M3",
[(operator (tr.vt tr.op:$V1), bdxaddr12only:$XBD2, index:$M3)]> { [(operator (tr.vt tr.op:$V1), bdxaddr12only:$XBD2, index:$M3)]> {
@ -3809,7 +3809,7 @@ class CompareRRE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
} }
class CompareRI<string mnemonic, bits<12> opcode, SDPatternOperator operator, class CompareRI<string mnemonic, bits<12> opcode, SDPatternOperator operator,
RegisterOperand cls, ImmOpWithPattern imm> RegisterOperand cls, Immediate imm>
: InstRIa<opcode, (outs), (ins cls:$R1, imm:$I2), : InstRIa<opcode, (outs), (ins cls:$R1, imm:$I2),
mnemonic#"\t$R1, $I2", mnemonic#"\t$R1, $I2",
[(set CC, (operator cls:$R1, imm:$I2))]> { [(set CC, (operator cls:$R1, imm:$I2))]> {
@ -3817,7 +3817,7 @@ class CompareRI<string mnemonic, bits<12> opcode, SDPatternOperator operator,
} }
class CompareRIL<string mnemonic, bits<12> opcode, SDPatternOperator operator, class CompareRIL<string mnemonic, bits<12> opcode, SDPatternOperator operator,
RegisterOperand cls, ImmOpWithPattern imm> RegisterOperand cls, Immediate imm>
: InstRILa<opcode, (outs), (ins cls:$R1, imm:$I2), : InstRILa<opcode, (outs), (ins cls:$R1, imm:$I2),
mnemonic#"\t$R1, $I2", mnemonic#"\t$R1, $I2",
[(set CC, (operator cls:$R1, imm:$I2))]> { [(set CC, (operator cls:$R1, imm:$I2))]> {
@ -3924,7 +3924,7 @@ class CompareSSb<string mnemonic, bits<8> opcode>
} }
class CompareSI<string mnemonic, bits<8> opcode, SDPatternOperator operator, class CompareSI<string mnemonic, bits<8> opcode, SDPatternOperator operator,
SDPatternOperator load, ImmOpWithPattern imm, SDPatternOperator load, Immediate imm,
AddressingMode mode = bdaddr12only> AddressingMode mode = bdaddr12only>
: InstSI<opcode, (outs), (ins mode:$BD1, imm:$I2), : InstSI<opcode, (outs), (ins mode:$BD1, imm:$I2),
mnemonic#"\t$BD1, $I2", mnemonic#"\t$BD1, $I2",
@ -3934,7 +3934,7 @@ class CompareSI<string mnemonic, bits<8> opcode, SDPatternOperator operator,
} }
class CompareSIL<string mnemonic, bits<16> opcode, SDPatternOperator operator, class CompareSIL<string mnemonic, bits<16> opcode, SDPatternOperator operator,
SDPatternOperator load, ImmOpWithPattern imm> SDPatternOperator load, Immediate imm>
: InstSIL<opcode, (outs), (ins bdaddr12only:$BD1, imm:$I2), : InstSIL<opcode, (outs), (ins bdaddr12only:$BD1, imm:$I2),
mnemonic#"\t$BD1, $I2", mnemonic#"\t$BD1, $I2",
[(set CC, (operator (load bdaddr12only:$BD1), imm:$I2))]> { [(set CC, (operator (load bdaddr12only:$BD1), imm:$I2))]> {
@ -3943,7 +3943,7 @@ class CompareSIL<string mnemonic, bits<16> opcode, SDPatternOperator operator,
} }
class CompareSIY<string mnemonic, bits<16> opcode, SDPatternOperator operator, class CompareSIY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
SDPatternOperator load, ImmOpWithPattern imm, SDPatternOperator load, Immediate imm,
AddressingMode mode = bdaddr20only> AddressingMode mode = bdaddr20only>
: InstSIY<opcode, (outs), (ins mode:$BD1, imm:$I2), : InstSIY<opcode, (outs), (ins mode:$BD1, imm:$I2),
mnemonic#"\t$BD1, $I2", mnemonic#"\t$BD1, $I2",
@ -3954,7 +3954,7 @@ class CompareSIY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
multiclass CompareSIPair<string mnemonic, bits<8> siOpcode, bits<16> siyOpcode, multiclass CompareSIPair<string mnemonic, bits<8> siOpcode, bits<16> siyOpcode,
SDPatternOperator operator, SDPatternOperator load, SDPatternOperator operator, SDPatternOperator load,
ImmOpWithPattern imm> { Immediate imm> {
let DispKey = mnemonic in { let DispKey = mnemonic in {
let DispSize = "12" in let DispSize = "12" in
def "" : CompareSI<mnemonic, siOpcode, operator, load, imm, bdaddr12pair>; def "" : CompareSI<mnemonic, siOpcode, operator, load, imm, bdaddr12pair>;
@ -4012,7 +4012,7 @@ class TestRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
} }
class TestBinarySIL<string mnemonic, bits<16> opcode, class TestBinarySIL<string mnemonic, bits<16> opcode,
SDPatternOperator operator, ImmOpWithPattern imm> SDPatternOperator operator, Immediate imm>
: InstSIL<opcode, (outs), (ins bdaddr12only:$BD1, imm:$I2), : InstSIL<opcode, (outs), (ins bdaddr12only:$BD1, imm:$I2),
mnemonic#"\t$BD1, $I2", mnemonic#"\t$BD1, $I2",
[(set CC, (operator bdaddr12only:$BD1, imm:$I2))]>; [(set CC, (operator bdaddr12only:$BD1, imm:$I2))]>;
@ -4073,7 +4073,7 @@ class SideEffectTernaryMemMemMemRRFb<string mnemonic, bits<16> opcode,
class SideEffectTernaryRRFc<string mnemonic, bits<16> opcode, class SideEffectTernaryRRFc<string mnemonic, bits<16> opcode,
RegisterOperand cls1, RegisterOperand cls2, RegisterOperand cls1, RegisterOperand cls2,
ImmOpWithPattern imm> Immediate imm>
: InstRRFc<opcode, (outs), (ins cls1:$R1, cls2:$R2, imm:$M3), : InstRRFc<opcode, (outs), (ins cls1:$R1, cls2:$R2, imm:$M3),
mnemonic#"\t$R1, $R2, $M3", []>; mnemonic#"\t$R1, $R2, $M3", []>;
@ -4086,7 +4086,7 @@ multiclass SideEffectTernaryRRFcOpt<string mnemonic, bits<16> opcode,
class SideEffectTernaryMemMemRRFc<string mnemonic, bits<16> opcode, class SideEffectTernaryMemMemRRFc<string mnemonic, bits<16> opcode,
RegisterOperand cls1, RegisterOperand cls2, RegisterOperand cls1, RegisterOperand cls2,
ImmOpWithPattern imm> Immediate imm>
: InstRRFc<opcode, (outs cls1:$R1, cls2:$R2), : InstRRFc<opcode, (outs cls1:$R1, cls2:$R2),
(ins cls1:$R1src, cls2:$R2src, imm:$M3), (ins cls1:$R1src, cls2:$R2src, imm:$M3),
mnemonic#"\t$R1, $R2, $M3", []> { mnemonic#"\t$R1, $R2, $M3", []> {
@ -4221,7 +4221,7 @@ class TernaryRXF<string mnemonic, bits<16> opcode, SDPatternOperator operator,
} }
class TernaryVRIa<string mnemonic, bits<16> opcode, SDPatternOperator operator, class TernaryVRIa<string mnemonic, bits<16> opcode, SDPatternOperator operator,
TypedReg tr1, TypedReg tr2, ImmOpWithPattern imm, ImmOpWithPattern index> TypedReg tr1, TypedReg tr2, Immediate imm, Immediate index>
: InstVRIa<opcode, (outs tr1.op:$V1), (ins tr2.op:$V1src, imm:$I2, index:$M3), : InstVRIa<opcode, (outs tr1.op:$V1), (ins tr2.op:$V1src, imm:$I2, index:$M3),
mnemonic#"\t$V1, $I2, $M3", mnemonic#"\t$V1, $I2, $M3",
[(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V1src), [(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V1src),
@ -4237,7 +4237,7 @@ class TernaryVRId<string mnemonic, bits<16> opcode, SDPatternOperator operator,
mnemonic#"\t$V1, $V2, $V3, $I4", mnemonic#"\t$V1, $V2, $V3, $I4",
[(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V2), [(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V2),
(tr2.vt tr2.op:$V3), (tr2.vt tr2.op:$V3),
imm32zx8_timm:$I4))]> { imm32zx8:$I4))]> {
let M5 = type; let M5 = type;
} }
@ -4252,8 +4252,8 @@ class TernaryVRRa<string mnemonic, bits<16> opcode, SDPatternOperator operator,
(ins tr2.op:$V2, imm32zx4:$M4, imm32zx4:$M5), (ins tr2.op:$V2, imm32zx4:$M4, imm32zx4:$M5),
mnemonic#"\t$V1, $V2, $M4, $M5", mnemonic#"\t$V1, $V2, $M4, $M5",
[(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V2), [(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V2),
imm32zx4_timm:$M4, imm32zx4:$M4,
imm32zx4_timm:$M5))], imm32zx4:$M5))],
m4or> { m4or> {
let M3 = type; let M3 = type;
} }
@ -4285,13 +4285,13 @@ multiclass TernaryOptVRRbSPair<string mnemonic, bits<16> opcode,
TypedReg tr1, TypedReg tr2, bits<4> type, TypedReg tr1, TypedReg tr2, bits<4> type,
bits<4> modifier = 0> { bits<4> modifier = 0> {
def "" : TernaryVRRb<mnemonic, opcode, operator, tr1, tr2, type, def "" : TernaryVRRb<mnemonic, opcode, operator, tr1, tr2, type,
imm32zx4even_timm, !and (modifier, 14)>; imm32zx4even, !and (modifier, 14)>;
def : InstAlias<mnemonic#"\t$V1, $V2, $V3", def : InstAlias<mnemonic#"\t$V1, $V2, $V3",
(!cast<Instruction>(NAME) tr1.op:$V1, tr2.op:$V2, (!cast<Instruction>(NAME) tr1.op:$V1, tr2.op:$V2,
tr2.op:$V3, 0)>; tr2.op:$V3, 0)>;
let Defs = [CC] in let Defs = [CC] in
def S : TernaryVRRb<mnemonic##"s", opcode, operator_cc, tr1, tr2, type, def S : TernaryVRRb<mnemonic##"s", opcode, operator_cc, tr1, tr2, type,
imm32zx4even_timm, !add(!and (modifier, 14), 1)>; imm32zx4even, !add(!and (modifier, 14), 1)>;
def : InstAlias<mnemonic#"s\t$V1, $V2, $V3", def : InstAlias<mnemonic#"s\t$V1, $V2, $V3",
(!cast<Instruction>(NAME#"S") tr1.op:$V1, tr2.op:$V2, (!cast<Instruction>(NAME#"S") tr1.op:$V1, tr2.op:$V2,
tr2.op:$V3, 0)>; tr2.op:$V3, 0)>;
@ -4314,7 +4314,7 @@ class TernaryVRRc<string mnemonic, bits<16> opcode, SDPatternOperator operator,
mnemonic#"\t$V1, $V2, $V3, $M4", mnemonic#"\t$V1, $V2, $V3, $M4",
[(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V2), [(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V2),
(tr2.vt tr2.op:$V3), (tr2.vt tr2.op:$V3),
imm32zx4_timm:$M4))]> { imm32zx4:$M4))]> {
let M5 = 0; let M5 = 0;
let M6 = 0; let M6 = 0;
} }
@ -4327,7 +4327,7 @@ class TernaryVRRcFloat<string mnemonic, bits<16> opcode,
mnemonic#"\t$V1, $V2, $V3, $M6", mnemonic#"\t$V1, $V2, $V3, $M6",
[(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V2), [(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V2),
(tr2.vt tr2.op:$V3), (tr2.vt tr2.op:$V3),
imm32zx4_timm:$M6))]> { imm32zx4:$M6))]> {
let M4 = type; let M4 = type;
let M5 = m5; let M5 = m5;
} }
@ -4429,7 +4429,7 @@ class TernaryVRSbGeneric<string mnemonic, bits<16> opcode>
} }
class TernaryVRV<string mnemonic, bits<16> opcode, bits<5> bytes, class TernaryVRV<string mnemonic, bits<16> opcode, bits<5> bytes,
ImmOpWithPattern index> Immediate index>
: InstVRV<opcode, (outs VR128:$V1), : InstVRV<opcode, (outs VR128:$V1),
(ins VR128:$V1src, bdvaddr12only:$VBD2, index:$M3), (ins VR128:$V1src, bdvaddr12only:$VBD2, index:$M3),
mnemonic#"\t$V1, $VBD2, $M3", []> { mnemonic#"\t$V1, $VBD2, $M3", []> {
@ -4440,7 +4440,7 @@ class TernaryVRV<string mnemonic, bits<16> opcode, bits<5> bytes,
} }
class TernaryVRX<string mnemonic, bits<16> opcode, SDPatternOperator operator, class TernaryVRX<string mnemonic, bits<16> opcode, SDPatternOperator operator,
TypedReg tr1, TypedReg tr2, bits<5> bytes, ImmOpWithPattern index> TypedReg tr1, TypedReg tr2, bits<5> bytes, Immediate index>
: InstVRX<opcode, (outs tr1.op:$V1), : InstVRX<opcode, (outs tr1.op:$V1),
(ins tr2.op:$V1src, bdxaddr12only:$XBD2, index:$M3), (ins tr2.op:$V1src, bdxaddr12only:$XBD2, index:$M3),
mnemonic#"\t$V1, $XBD2, $M3", mnemonic#"\t$V1, $XBD2, $M3",
@ -4461,7 +4461,7 @@ class QuaternaryVRId<string mnemonic, bits<16> opcode, SDPatternOperator operato
[(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V1src), [(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V1src),
(tr2.vt tr2.op:$V2), (tr2.vt tr2.op:$V2),
(tr2.vt tr2.op:$V3), (tr2.vt tr2.op:$V3),
imm32zx8_timm:$I4))]> { imm32zx8:$I4))]> {
let Constraints = "$V1 = $V1src"; let Constraints = "$V1 = $V1src";
let DisableEncoding = "$V1src"; let DisableEncoding = "$V1src";
let M5 = type; let M5 = type;
@ -4480,7 +4480,7 @@ class QuaternaryVRIf<string mnemonic, bits<16> opcode>
: InstVRIf<opcode, (outs VR128:$V1), : InstVRIf<opcode, (outs VR128:$V1),
(ins VR128:$V2, VR128:$V3, (ins VR128:$V2, VR128:$V3,
imm32zx8:$I4, imm32zx4:$M5), imm32zx8:$I4, imm32zx4:$M5),
mnemonic#"\t$V1, $V2, $V3, $I4, $M5", []>; mnemonic#"\t$V1, $V2, $V3, $I4, $M5", []>;
class QuaternaryVRIg<string mnemonic, bits<16> opcode> class QuaternaryVRIg<string mnemonic, bits<16> opcode>
: InstVRIg<opcode, (outs VR128:$V1), : InstVRIg<opcode, (outs VR128:$V1),
@ -4491,7 +4491,7 @@ class QuaternaryVRIg<string mnemonic, bits<16> opcode>
class QuaternaryVRRd<string mnemonic, bits<16> opcode, class QuaternaryVRRd<string mnemonic, bits<16> opcode,
SDPatternOperator operator, TypedReg tr1, TypedReg tr2, SDPatternOperator operator, TypedReg tr1, TypedReg tr2,
TypedReg tr3, TypedReg tr4, bits<4> type, TypedReg tr3, TypedReg tr4, bits<4> type,
SDPatternOperator m6mask = imm32zx4_timm, bits<4> m6or = 0> SDPatternOperator m6mask = imm32zx4, bits<4> m6or = 0>
: InstVRRd<opcode, (outs tr1.op:$V1), : InstVRRd<opcode, (outs tr1.op:$V1),
(ins tr2.op:$V2, tr3.op:$V3, tr4.op:$V4, m6mask:$M6), (ins tr2.op:$V2, tr3.op:$V3, tr4.op:$V4, m6mask:$M6),
mnemonic#"\t$V1, $V2, $V3, $V4, $M6", mnemonic#"\t$V1, $V2, $V3, $V4, $M6",
@ -4518,14 +4518,14 @@ multiclass QuaternaryOptVRRdSPair<string mnemonic, bits<16> opcode,
bits<4> modifier = 0> { bits<4> modifier = 0> {
def "" : QuaternaryVRRd<mnemonic, opcode, operator, def "" : QuaternaryVRRd<mnemonic, opcode, operator,
tr1, tr2, tr2, tr2, type, tr1, tr2, tr2, tr2, type,
imm32zx4even_timm, !and (modifier, 14)>; imm32zx4even, !and (modifier, 14)>;
def : InstAlias<mnemonic#"\t$V1, $V2, $V3, $V4", def : InstAlias<mnemonic#"\t$V1, $V2, $V3, $V4",
(!cast<Instruction>(NAME) tr1.op:$V1, tr2.op:$V2, (!cast<Instruction>(NAME) tr1.op:$V1, tr2.op:$V2,
tr2.op:$V3, tr2.op:$V4, 0)>; tr2.op:$V3, tr2.op:$V4, 0)>;
let Defs = [CC] in let Defs = [CC] in
def S : QuaternaryVRRd<mnemonic##"s", opcode, operator_cc, def S : QuaternaryVRRd<mnemonic##"s", opcode, operator_cc,
tr1, tr2, tr2, tr2, type, tr1, tr2, tr2, tr2, type,
imm32zx4even_timm, !add (!and (modifier, 14), 1)>; imm32zx4even, !add (!and (modifier, 14), 1)>;
def : InstAlias<mnemonic#"s\t$V1, $V2, $V3, $V4", def : InstAlias<mnemonic#"s\t$V1, $V2, $V3, $V4",
(!cast<Instruction>(NAME#"S") tr1.op:$V1, tr2.op:$V2, (!cast<Instruction>(NAME#"S") tr1.op:$V1, tr2.op:$V2,
tr2.op:$V3, tr2.op:$V4, 0)>; tr2.op:$V3, tr2.op:$V4, 0)>;
@ -4536,7 +4536,7 @@ multiclass QuaternaryOptVRRdSPairGeneric<string mnemonic, bits<16> opcode> {
def "" : QuaternaryVRRdGeneric<mnemonic, opcode>; def "" : QuaternaryVRRdGeneric<mnemonic, opcode>;
def : InstAlias<mnemonic#"\t$V1, $V2, $V3, $V4, $M5", def : InstAlias<mnemonic#"\t$V1, $V2, $V3, $V4, $M5",
(!cast<Instruction>(NAME) VR128:$V1, VR128:$V2, VR128:$V3, (!cast<Instruction>(NAME) VR128:$V1, VR128:$V2, VR128:$V3,
VR128:$V4, imm32zx4_timm:$M5, 0)>; VR128:$V4, imm32zx4:$M5, 0)>;
} }
class SideEffectQuaternaryRRFa<string mnemonic, bits<16> opcode, class SideEffectQuaternaryRRFa<string mnemonic, bits<16> opcode,
@ -4638,13 +4638,13 @@ class RotateSelectRIEf<string mnemonic, bits<16> opcode, RegisterOperand cls1,
class PrefetchRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator> class PrefetchRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator>
: InstRXYb<opcode, (outs), (ins imm32zx4:$M1, bdxaddr20only:$XBD2), : InstRXYb<opcode, (outs), (ins imm32zx4:$M1, bdxaddr20only:$XBD2),
mnemonic##"\t$M1, $XBD2", mnemonic##"\t$M1, $XBD2",
[(operator imm32zx4_timm:$M1, bdxaddr20only:$XBD2)]>; [(operator imm32zx4:$M1, bdxaddr20only:$XBD2)]>;
class PrefetchRILPC<string mnemonic, bits<12> opcode, class PrefetchRILPC<string mnemonic, bits<12> opcode,
SDPatternOperator operator> SDPatternOperator operator>
: InstRILc<opcode, (outs), (ins imm32zx4_timm:$M1, pcrel32:$RI2), : InstRILc<opcode, (outs), (ins imm32zx4:$M1, pcrel32:$RI2),
mnemonic##"\t$M1, $RI2", mnemonic##"\t$M1, $RI2",
[(operator imm32zx4_timm:$M1, pcrel32:$RI2)]> { [(operator imm32zx4:$M1, pcrel32:$RI2)]> {
// We want PC-relative addresses to be tried ahead of BD and BDX addresses. // We want PC-relative addresses to be tried ahead of BD and BDX addresses.
// However, BDXs have two extra operands and are therefore 6 units more // However, BDXs have two extra operands and are therefore 6 units more
// complex. // complex.
@ -4691,7 +4691,7 @@ class Pseudo<dag outs, dag ins, list<dag> pattern>
// Like UnaryRI, but expanded after RA depending on the choice of register. // Like UnaryRI, but expanded after RA depending on the choice of register.
class UnaryRIPseudo<SDPatternOperator operator, RegisterOperand cls, class UnaryRIPseudo<SDPatternOperator operator, RegisterOperand cls,
ImmOpWithPattern imm> Immediate imm>
: Pseudo<(outs cls:$R1), (ins imm:$I2), : Pseudo<(outs cls:$R1), (ins imm:$I2),
[(set cls:$R1, (operator imm:$I2))]>; [(set cls:$R1, (operator imm:$I2))]>;
@ -4720,7 +4720,7 @@ class UnaryRRPseudo<string key, SDPatternOperator operator,
// Like BinaryRI, but expanded after RA depending on the choice of register. // Like BinaryRI, but expanded after RA depending on the choice of register.
class BinaryRIPseudo<SDPatternOperator operator, RegisterOperand cls, class BinaryRIPseudo<SDPatternOperator operator, RegisterOperand cls,
ImmOpWithPattern imm> Immediate imm>
: Pseudo<(outs cls:$R1), (ins cls:$R1src, imm:$I2), : Pseudo<(outs cls:$R1), (ins cls:$R1src, imm:$I2),
[(set cls:$R1, (operator cls:$R1src, imm:$I2))]> { [(set cls:$R1, (operator cls:$R1src, imm:$I2))]> {
let Constraints = "$R1 = $R1src"; let Constraints = "$R1 = $R1src";
@ -4728,13 +4728,13 @@ class BinaryRIPseudo<SDPatternOperator operator, RegisterOperand cls,
// Like BinaryRIE, but expanded after RA depending on the choice of register. // Like BinaryRIE, but expanded after RA depending on the choice of register.
class BinaryRIEPseudo<SDPatternOperator operator, RegisterOperand cls, class BinaryRIEPseudo<SDPatternOperator operator, RegisterOperand cls,
ImmOpWithPattern imm> Immediate imm>
: Pseudo<(outs cls:$R1), (ins cls:$R3, imm:$I2), : Pseudo<(outs cls:$R1), (ins cls:$R3, imm:$I2),
[(set cls:$R1, (operator cls:$R3, imm:$I2))]>; [(set cls:$R1, (operator cls:$R3, imm:$I2))]>;
// Like BinaryRIAndK, but expanded after RA depending on the choice of register. // Like BinaryRIAndK, but expanded after RA depending on the choice of register.
multiclass BinaryRIAndKPseudo<string key, SDPatternOperator operator, multiclass BinaryRIAndKPseudo<string key, SDPatternOperator operator,
RegisterOperand cls, ImmOpWithPattern imm> { RegisterOperand cls, Immediate imm> {
let NumOpsKey = key in { let NumOpsKey = key in {
let NumOpsValue = "3" in let NumOpsValue = "3" in
def K : BinaryRIEPseudo<operator, cls, imm>, def K : BinaryRIEPseudo<operator, cls, imm>,
@ -4764,7 +4764,7 @@ class MemFoldPseudo<string mnemonic, RegisterOperand cls, bits<5> bytes,
// Like CompareRI, but expanded after RA depending on the choice of register. // Like CompareRI, but expanded after RA depending on the choice of register.
class CompareRIPseudo<SDPatternOperator operator, RegisterOperand cls, class CompareRIPseudo<SDPatternOperator operator, RegisterOperand cls,
ImmOpWithPattern imm> Immediate imm>
: Pseudo<(outs), (ins cls:$R1, imm:$I2), : Pseudo<(outs), (ins cls:$R1, imm:$I2),
[(set CC, (operator cls:$R1, imm:$I2))]> { [(set CC, (operator cls:$R1, imm:$I2))]> {
let isCompare = 1; let isCompare = 1;
@ -4783,7 +4783,7 @@ class CompareRXYPseudo<SDPatternOperator operator, RegisterOperand cls,
} }
// Like TestBinarySIL, but expanded later. // Like TestBinarySIL, but expanded later.
class TestBinarySILPseudo<SDPatternOperator operator, ImmOpWithPattern imm> class TestBinarySILPseudo<SDPatternOperator operator, Immediate imm>
: Pseudo<(outs), (ins bdaddr12only:$BD1, imm:$I2), : Pseudo<(outs), (ins bdaddr12only:$BD1, imm:$I2),
[(set CC, (operator bdaddr12only:$BD1, imm:$I2))]>; [(set CC, (operator bdaddr12only:$BD1, imm:$I2))]>;
@ -4812,7 +4812,7 @@ class CondBinaryRRFaPseudo<RegisterOperand cls1, RegisterOperand cls2,
// Like CondBinaryRIE, but expanded after RA depending on the choice of // Like CondBinaryRIE, but expanded after RA depending on the choice of
// register. // register.
class CondBinaryRIEPseudo<RegisterOperand cls, ImmOpWithPattern imm> class CondBinaryRIEPseudo<RegisterOperand cls, Immediate imm>
: Pseudo<(outs cls:$R1), : Pseudo<(outs cls:$R1),
(ins cls:$R1src, imm:$I2, cond4:$valid, cond4:$M3), (ins cls:$R1src, imm:$I2, cond4:$valid, cond4:$M3),
[(set cls:$R1, (z_select_ccmask imm:$I2, cls:$R1src, [(set cls:$R1, (z_select_ccmask imm:$I2, cls:$R1src,
@ -4876,7 +4876,7 @@ class SelectWrapper<ValueType vt, RegisterOperand cls>
: Pseudo<(outs cls:$dst), : Pseudo<(outs cls:$dst),
(ins cls:$src1, cls:$src2, imm32zx4:$valid, imm32zx4:$cc), (ins cls:$src1, cls:$src2, imm32zx4:$valid, imm32zx4:$cc),
[(set (vt cls:$dst), (z_select_ccmask cls:$src1, cls:$src2, [(set (vt cls:$dst), (z_select_ccmask cls:$src1, cls:$src2,
imm32zx4_timm:$valid, imm32zx4_timm:$cc))]> { imm32zx4:$valid, imm32zx4:$cc))]> {
let usesCustomInserter = 1; let usesCustomInserter = 1;
let hasNoSchedulingInfo = 1; let hasNoSchedulingInfo = 1;
let Uses = [CC]; let Uses = [CC];
@ -4890,12 +4890,12 @@ multiclass CondStores<RegisterOperand cls, SDPatternOperator store,
def "" : Pseudo<(outs), def "" : Pseudo<(outs),
(ins cls:$new, mode:$addr, imm32zx4:$valid, imm32zx4:$cc), (ins cls:$new, mode:$addr, imm32zx4:$valid, imm32zx4:$cc),
[(store (z_select_ccmask cls:$new, (load mode:$addr), [(store (z_select_ccmask cls:$new, (load mode:$addr),
imm32zx4_timm:$valid, imm32zx4_timm:$cc), imm32zx4:$valid, imm32zx4:$cc),
mode:$addr)]>; mode:$addr)]>;
def Inv : Pseudo<(outs), def Inv : Pseudo<(outs),
(ins cls:$new, mode:$addr, imm32zx4:$valid, imm32zx4:$cc), (ins cls:$new, mode:$addr, imm32zx4:$valid, imm32zx4:$cc),
[(store (z_select_ccmask (load mode:$addr), cls:$new, [(store (z_select_ccmask (load mode:$addr), cls:$new,
imm32zx4_timm:$valid, imm32zx4_timm:$cc), imm32zx4:$valid, imm32zx4:$cc),
mode:$addr)]>; mode:$addr)]>;
} }
} }
@ -4917,11 +4917,11 @@ class AtomicLoadBinary<SDPatternOperator operator, RegisterOperand cls,
// Specializations of AtomicLoadWBinary. // Specializations of AtomicLoadWBinary.
class AtomicLoadBinaryReg32<SDPatternOperator operator> class AtomicLoadBinaryReg32<SDPatternOperator operator>
: AtomicLoadBinary<operator, GR32, (i32 GR32:$src2), GR32>; : AtomicLoadBinary<operator, GR32, (i32 GR32:$src2), GR32>;
class AtomicLoadBinaryImm32<SDPatternOperator operator, ImmOpWithPattern imm> class AtomicLoadBinaryImm32<SDPatternOperator operator, Immediate imm>
: AtomicLoadBinary<operator, GR32, (i32 imm:$src2), imm>; : AtomicLoadBinary<operator, GR32, (i32 imm:$src2), imm>;
class AtomicLoadBinaryReg64<SDPatternOperator operator> class AtomicLoadBinaryReg64<SDPatternOperator operator>
: AtomicLoadBinary<operator, GR64, (i64 GR64:$src2), GR64>; : AtomicLoadBinary<operator, GR64, (i64 GR64:$src2), GR64>;
class AtomicLoadBinaryImm64<SDPatternOperator operator, ImmOpWithPattern imm> class AtomicLoadBinaryImm64<SDPatternOperator operator, Immediate imm>
: AtomicLoadBinary<operator, GR64, (i64 imm:$src2), imm>; : AtomicLoadBinary<operator, GR64, (i64 imm:$src2), imm>;
// OPERATOR is ATOMIC_SWAPW or an ATOMIC_LOADW_* operation. PAT and OPERAND // OPERATOR is ATOMIC_SWAPW or an ATOMIC_LOADW_* operation. PAT and OPERAND
@ -4944,7 +4944,7 @@ class AtomicLoadWBinary<SDPatternOperator operator, dag pat,
// Specializations of AtomicLoadWBinary. // Specializations of AtomicLoadWBinary.
class AtomicLoadWBinaryReg<SDPatternOperator operator> class AtomicLoadWBinaryReg<SDPatternOperator operator>
: AtomicLoadWBinary<operator, (i32 GR32:$src2), GR32>; : AtomicLoadWBinary<operator, (i32 GR32:$src2), GR32>;
class AtomicLoadWBinaryImm<SDPatternOperator operator, ImmOpWithPattern imm> class AtomicLoadWBinaryImm<SDPatternOperator operator, Immediate imm>
: AtomicLoadWBinary<operator, (i32 imm:$src2), imm>; : AtomicLoadWBinary<operator, (i32 imm:$src2), imm>;
// A pseudo instruction that is a direct alias of a real instruction. // A pseudo instruction that is a direct alias of a real instruction.
@ -4979,7 +4979,7 @@ class StoreAliasVRX<SDPatternOperator operator, TypedReg tr,
// An alias of a BinaryRI, but with different register sizes. // An alias of a BinaryRI, but with different register sizes.
class BinaryAliasRI<SDPatternOperator operator, RegisterOperand cls, class BinaryAliasRI<SDPatternOperator operator, RegisterOperand cls,
ImmOpWithPattern imm> Immediate imm>
: Alias<4, (outs cls:$R1), (ins cls:$R1src, imm:$I2), : Alias<4, (outs cls:$R1), (ins cls:$R1src, imm:$I2),
[(set cls:$R1, (operator cls:$R1src, imm:$I2))]> { [(set cls:$R1, (operator cls:$R1src, imm:$I2))]> {
let Constraints = "$R1 = $R1src"; let Constraints = "$R1 = $R1src";
@ -4987,7 +4987,7 @@ class BinaryAliasRI<SDPatternOperator operator, RegisterOperand cls,
// An alias of a BinaryRIL, but with different register sizes. // An alias of a BinaryRIL, but with different register sizes.
class BinaryAliasRIL<SDPatternOperator operator, RegisterOperand cls, class BinaryAliasRIL<SDPatternOperator operator, RegisterOperand cls,
ImmOpWithPattern imm> Immediate imm>
: Alias<6, (outs cls:$R1), (ins cls:$R1src, imm:$I2), : Alias<6, (outs cls:$R1), (ins cls:$R1src, imm:$I2),
[(set cls:$R1, (operator cls:$R1src, imm:$I2))]> { [(set cls:$R1, (operator cls:$R1src, imm:$I2))]> {
let Constraints = "$R1 = $R1src"; let Constraints = "$R1 = $R1src";
@ -4999,7 +4999,7 @@ class BinaryAliasVRRf<RegisterOperand cls>
// An alias of a CompareRI, but with different register sizes. // An alias of a CompareRI, but with different register sizes.
class CompareAliasRI<SDPatternOperator operator, RegisterOperand cls, class CompareAliasRI<SDPatternOperator operator, RegisterOperand cls,
ImmOpWithPattern imm> Immediate imm>
: Alias<4, (outs), (ins cls:$R1, imm:$I2), : Alias<4, (outs), (ins cls:$R1, imm:$I2),
[(set CC, (operator cls:$R1, imm:$I2))]> { [(set CC, (operator cls:$R1, imm:$I2))]> {
let isCompare = 1; let isCompare = 1;

View File

@ -60,7 +60,7 @@ let Predicates = [FeatureVector] in {
// Generate byte mask. // Generate byte mask.
def VZERO : InherentVRIa<"vzero", 0xE744, 0>; def VZERO : InherentVRIa<"vzero", 0xE744, 0>;
def VONE : InherentVRIa<"vone", 0xE744, 0xffff>; def VONE : InherentVRIa<"vone", 0xE744, 0xffff>;
def VGBM : UnaryVRIa<"vgbm", 0xE744, z_byte_mask, v128b, imm32zx16_timm>; def VGBM : UnaryVRIa<"vgbm", 0xE744, z_byte_mask, v128b, imm32zx16>;
// Generate mask. // Generate mask.
def VGM : BinaryVRIbGeneric<"vgm", 0xE746>; def VGM : BinaryVRIbGeneric<"vgm", 0xE746>;
@ -71,10 +71,10 @@ let Predicates = [FeatureVector] in {
// Replicate immediate. // Replicate immediate.
def VREPI : UnaryVRIaGeneric<"vrepi", 0xE745, imm32sx16>; def VREPI : UnaryVRIaGeneric<"vrepi", 0xE745, imm32sx16>;
def VREPIB : UnaryVRIa<"vrepib", 0xE745, z_replicate, v128b, imm32sx16_timm, 0>; def VREPIB : UnaryVRIa<"vrepib", 0xE745, z_replicate, v128b, imm32sx16, 0>;
def VREPIH : UnaryVRIa<"vrepih", 0xE745, z_replicate, v128h, imm32sx16_timm, 1>; def VREPIH : UnaryVRIa<"vrepih", 0xE745, z_replicate, v128h, imm32sx16, 1>;
def VREPIF : UnaryVRIa<"vrepif", 0xE745, z_replicate, v128f, imm32sx16_timm, 2>; def VREPIF : UnaryVRIa<"vrepif", 0xE745, z_replicate, v128f, imm32sx16, 2>;
def VREPIG : UnaryVRIa<"vrepig", 0xE745, z_replicate, v128g, imm32sx16_timm, 3>; def VREPIG : UnaryVRIa<"vrepig", 0xE745, z_replicate, v128g, imm32sx16, 3>;
} }
// Load element immediate. // Load element immediate.
@ -116,7 +116,7 @@ let Predicates = [FeatureVector] in {
(ins bdxaddr12only:$XBD2, imm32zx4:$M3), (ins bdxaddr12only:$XBD2, imm32zx4:$M3),
"lcbb\t$R1, $XBD2, $M3", "lcbb\t$R1, $XBD2, $M3",
[(set GR32:$R1, (int_s390_lcbb bdxaddr12only:$XBD2, [(set GR32:$R1, (int_s390_lcbb bdxaddr12only:$XBD2,
imm32zx4_timm:$M3))]>; imm32zx4:$M3))]>;
// Load with length. The number of loaded bytes is only known at run time. // Load with length. The number of loaded bytes is only known at run time.
def VLL : BinaryVRSb<"vll", 0xE737, int_s390_vll, 0>; def VLL : BinaryVRSb<"vll", 0xE737, int_s390_vll, 0>;
@ -362,9 +362,9 @@ let Predicates = [FeatureVector] in {
def VREPH : BinaryVRIc<"vreph", 0xE74D, z_splat, v128h, v128h, 1>; def VREPH : BinaryVRIc<"vreph", 0xE74D, z_splat, v128h, v128h, 1>;
def VREPF : BinaryVRIc<"vrepf", 0xE74D, z_splat, v128f, v128f, 2>; def VREPF : BinaryVRIc<"vrepf", 0xE74D, z_splat, v128f, v128f, 2>;
def VREPG : BinaryVRIc<"vrepg", 0xE74D, z_splat, v128g, v128g, 3>; def VREPG : BinaryVRIc<"vrepg", 0xE74D, z_splat, v128g, v128g, 3>;
def : Pat<(v4f32 (z_splat VR128:$vec, imm32zx16_timm:$index)), def : Pat<(v4f32 (z_splat VR128:$vec, imm32zx16:$index)),
(VREPF VR128:$vec, imm32zx16:$index)>; (VREPF VR128:$vec, imm32zx16:$index)>;
def : Pat<(v2f64 (z_splat VR128:$vec, imm32zx16_timm:$index)), def : Pat<(v2f64 (z_splat VR128:$vec, imm32zx16:$index)),
(VREPG VR128:$vec, imm32zx16:$index)>; (VREPG VR128:$vec, imm32zx16:$index)>;
// Select. // Select.
@ -778,7 +778,7 @@ let Predicates = [FeatureVector] in {
// Shift left double by byte. // Shift left double by byte.
def VSLDB : TernaryVRId<"vsldb", 0xE777, z_shl_double, v128b, v128b, 0>; def VSLDB : TernaryVRId<"vsldb", 0xE777, z_shl_double, v128b, v128b, 0>;
def : Pat<(int_s390_vsldb VR128:$x, VR128:$y, imm32zx8_timm:$z), def : Pat<(int_s390_vsldb VR128:$x, VR128:$y, imm32zx8:$z),
(VSLDB VR128:$x, VR128:$y, imm32zx8:$z)>; (VSLDB VR128:$x, VR128:$y, imm32zx8:$z)>;
// Shift left double by bit. // Shift left double by bit.

View File

@ -21,32 +21,15 @@ class ImmediateTLSAsmOperand<string name>
let RenderMethod = "addImmTLSOperands"; let RenderMethod = "addImmTLSOperands";
} }
class ImmediateOp<ValueType vt, string asmop> : Operand<vt> {
let PrintMethod = "print"##asmop##"Operand";
let DecoderMethod = "decode"##asmop##"Operand";
let ParserMatchClass = !cast<AsmOperandClass>(asmop);
}
class ImmOpWithPattern<ValueType vt, string asmop, code pred, SDNodeXForm xform,
SDNode ImmNode = imm> :
ImmediateOp<vt, asmop>, PatLeaf<(vt ImmNode), pred, xform>;
// class ImmediatePatLeaf<ValueType vt, code pred,
// SDNodeXForm xform, SDNode ImmNode>
// : PatLeaf<(vt ImmNode), pred, xform>;
// Constructs both a DAG pattern and instruction operand for an immediate // Constructs both a DAG pattern and instruction operand for an immediate
// of type VT. PRED returns true if a node is acceptable and XFORM returns // of type VT. PRED returns true if a node is acceptable and XFORM returns
// the operand value associated with the node. ASMOP is the name of the // the operand value associated with the node. ASMOP is the name of the
// associated asm operand, and also forms the basis of the asm print method. // associated asm operand, and also forms the basis of the asm print method.
multiclass Immediate<ValueType vt, code pred, SDNodeXForm xform, string asmop> { class Immediate<ValueType vt, code pred, SDNodeXForm xform, string asmop>
// def "" : ImmediateOp<vt, asmop>, : PatLeaf<(vt imm), pred, xform>, Operand<vt> {
// PatLeaf<(vt imm), pred, xform>; let PrintMethod = "print"##asmop##"Operand";
def "" : ImmOpWithPattern<vt, asmop, pred, xform>; let DecoderMethod = "decode"##asmop##"Operand";
let ParserMatchClass = !cast<AsmOperandClass>(asmop);
// def _timm : PatLeaf<(vt timm), pred, xform>;
def _timm : ImmOpWithPattern<vt, asmop, pred, xform, timm>;
} }
// Constructs an asm operand for a PC-relative address. SIZE says how // Constructs an asm operand for a PC-relative address. SIZE says how
@ -312,87 +295,87 @@ def U48Imm : ImmediateAsmOperand<"U48Imm">;
// Immediates for the lower and upper 16 bits of an i32, with the other // Immediates for the lower and upper 16 bits of an i32, with the other
// bits of the i32 being zero. // bits of the i32 being zero.
defm imm32ll16 : Immediate<i32, [{ def imm32ll16 : Immediate<i32, [{
return SystemZ::isImmLL(N->getZExtValue()); return SystemZ::isImmLL(N->getZExtValue());
}], LL16, "U16Imm">; }], LL16, "U16Imm">;
defm imm32lh16 : Immediate<i32, [{ def imm32lh16 : Immediate<i32, [{
return SystemZ::isImmLH(N->getZExtValue()); return SystemZ::isImmLH(N->getZExtValue());
}], LH16, "U16Imm">; }], LH16, "U16Imm">;
// Immediates for the lower and upper 16 bits of an i32, with the other // Immediates for the lower and upper 16 bits of an i32, with the other
// bits of the i32 being one. // bits of the i32 being one.
defm imm32ll16c : Immediate<i32, [{ def imm32ll16c : Immediate<i32, [{
return SystemZ::isImmLL(uint32_t(~N->getZExtValue())); return SystemZ::isImmLL(uint32_t(~N->getZExtValue()));
}], LL16, "U16Imm">; }], LL16, "U16Imm">;
defm imm32lh16c : Immediate<i32, [{ def imm32lh16c : Immediate<i32, [{
return SystemZ::isImmLH(uint32_t(~N->getZExtValue())); return SystemZ::isImmLH(uint32_t(~N->getZExtValue()));
}], LH16, "U16Imm">; }], LH16, "U16Imm">;
// Short immediates // Short immediates
defm imm32zx1 : Immediate<i32, [{ def imm32zx1 : Immediate<i32, [{
return isUInt<1>(N->getZExtValue()); return isUInt<1>(N->getZExtValue());
}], NOOP_SDNodeXForm, "U1Imm">; }], NOOP_SDNodeXForm, "U1Imm">;
defm imm32zx2 : Immediate<i32, [{ def imm32zx2 : Immediate<i32, [{
return isUInt<2>(N->getZExtValue()); return isUInt<2>(N->getZExtValue());
}], NOOP_SDNodeXForm, "U2Imm">; }], NOOP_SDNodeXForm, "U2Imm">;
defm imm32zx3 : Immediate<i32, [{ def imm32zx3 : Immediate<i32, [{
return isUInt<3>(N->getZExtValue()); return isUInt<3>(N->getZExtValue());
}], NOOP_SDNodeXForm, "U3Imm">; }], NOOP_SDNodeXForm, "U3Imm">;
defm imm32zx4 : Immediate<i32, [{ def imm32zx4 : Immediate<i32, [{
return isUInt<4>(N->getZExtValue()); return isUInt<4>(N->getZExtValue());
}], NOOP_SDNodeXForm, "U4Imm">; }], NOOP_SDNodeXForm, "U4Imm">;
// Note: this enforces an even value during code generation only. // Note: this enforces an even value during code generation only.
// When used from the assembler, any 4-bit value is allowed. // When used from the assembler, any 4-bit value is allowed.
defm imm32zx4even : Immediate<i32, [{ def imm32zx4even : Immediate<i32, [{
return isUInt<4>(N->getZExtValue()); return isUInt<4>(N->getZExtValue());
}], UIMM8EVEN, "U4Imm">; }], UIMM8EVEN, "U4Imm">;
defm imm32zx6 : Immediate<i32, [{ def imm32zx6 : Immediate<i32, [{
return isUInt<6>(N->getZExtValue()); return isUInt<6>(N->getZExtValue());
}], NOOP_SDNodeXForm, "U6Imm">; }], NOOP_SDNodeXForm, "U6Imm">;
defm imm32sx8 : Immediate<i32, [{ def imm32sx8 : Immediate<i32, [{
return isInt<8>(N->getSExtValue()); return isInt<8>(N->getSExtValue());
}], SIMM8, "S8Imm">; }], SIMM8, "S8Imm">;
defm imm32zx8 : Immediate<i32, [{ def imm32zx8 : Immediate<i32, [{
return isUInt<8>(N->getZExtValue()); return isUInt<8>(N->getZExtValue());
}], UIMM8, "U8Imm">; }], UIMM8, "U8Imm">;
defm imm32zx8trunc : Immediate<i32, [{}], UIMM8, "U8Imm">; def imm32zx8trunc : Immediate<i32, [{}], UIMM8, "U8Imm">;
defm imm32zx12 : Immediate<i32, [{ def imm32zx12 : Immediate<i32, [{
return isUInt<12>(N->getZExtValue()); return isUInt<12>(N->getZExtValue());
}], UIMM12, "U12Imm">; }], UIMM12, "U12Imm">;
defm imm32sx16 : Immediate<i32, [{ def imm32sx16 : Immediate<i32, [{
return isInt<16>(N->getSExtValue()); return isInt<16>(N->getSExtValue());
}], SIMM16, "S16Imm">; }], SIMM16, "S16Imm">;
defm imm32sx16n : Immediate<i32, [{ def imm32sx16n : Immediate<i32, [{
return isInt<16>(-N->getSExtValue()); return isInt<16>(-N->getSExtValue());
}], NEGSIMM16, "S16Imm">; }], NEGSIMM16, "S16Imm">;
defm imm32zx16 : Immediate<i32, [{ def imm32zx16 : Immediate<i32, [{
return isUInt<16>(N->getZExtValue()); return isUInt<16>(N->getZExtValue());
}], UIMM16, "U16Imm">; }], UIMM16, "U16Imm">;
defm imm32sx16trunc : Immediate<i32, [{}], SIMM16, "S16Imm">; def imm32sx16trunc : Immediate<i32, [{}], SIMM16, "S16Imm">;
defm imm32zx16trunc : Immediate<i32, [{}], UIMM16, "U16Imm">; def imm32zx16trunc : Immediate<i32, [{}], UIMM16, "U16Imm">;
// Full 32-bit immediates. we need both signed and unsigned versions // Full 32-bit immediates. we need both signed and unsigned versions
// because the assembler is picky. E.g. AFI requires signed operands // because the assembler is picky. E.g. AFI requires signed operands
// while NILF requires unsigned ones. // while NILF requires unsigned ones.
defm simm32 : Immediate<i32, [{}], SIMM32, "S32Imm">; def simm32 : Immediate<i32, [{}], SIMM32, "S32Imm">;
defm uimm32 : Immediate<i32, [{}], UIMM32, "U32Imm">; def uimm32 : Immediate<i32, [{}], UIMM32, "U32Imm">;
defm simm32n : Immediate<i32, [{ def simm32n : Immediate<i32, [{
return isInt<32>(-N->getSExtValue()); return isInt<32>(-N->getSExtValue());
}], NEGSIMM32, "S32Imm">; }], NEGSIMM32, "S32Imm">;
@ -404,107 +387,107 @@ def imm32 : ImmLeaf<i32, [{}]>;
// Immediates for 16-bit chunks of an i64, with the other bits of the // Immediates for 16-bit chunks of an i64, with the other bits of the
// i32 being zero. // i32 being zero.
defm imm64ll16 : Immediate<i64, [{ def imm64ll16 : Immediate<i64, [{
return SystemZ::isImmLL(N->getZExtValue()); return SystemZ::isImmLL(N->getZExtValue());
}], LL16, "U16Imm">; }], LL16, "U16Imm">;
defm imm64lh16 : Immediate<i64, [{ def imm64lh16 : Immediate<i64, [{
return SystemZ::isImmLH(N->getZExtValue()); return SystemZ::isImmLH(N->getZExtValue());
}], LH16, "U16Imm">; }], LH16, "U16Imm">;
defm imm64hl16 : Immediate<i64, [{ def imm64hl16 : Immediate<i64, [{
return SystemZ::isImmHL(N->getZExtValue()); return SystemZ::isImmHL(N->getZExtValue());
}], HL16, "U16Imm">; }], HL16, "U16Imm">;
defm imm64hh16 : Immediate<i64, [{ def imm64hh16 : Immediate<i64, [{
return SystemZ::isImmHH(N->getZExtValue()); return SystemZ::isImmHH(N->getZExtValue());
}], HH16, "U16Imm">; }], HH16, "U16Imm">;
// Immediates for 16-bit chunks of an i64, with the other bits of the // Immediates for 16-bit chunks of an i64, with the other bits of the
// i32 being one. // i32 being one.
defm imm64ll16c : Immediate<i64, [{ def imm64ll16c : Immediate<i64, [{
return SystemZ::isImmLL(uint64_t(~N->getZExtValue())); return SystemZ::isImmLL(uint64_t(~N->getZExtValue()));
}], LL16, "U16Imm">; }], LL16, "U16Imm">;
defm imm64lh16c : Immediate<i64, [{ def imm64lh16c : Immediate<i64, [{
return SystemZ::isImmLH(uint64_t(~N->getZExtValue())); return SystemZ::isImmLH(uint64_t(~N->getZExtValue()));
}], LH16, "U16Imm">; }], LH16, "U16Imm">;
defm imm64hl16c : Immediate<i64, [{ def imm64hl16c : Immediate<i64, [{
return SystemZ::isImmHL(uint64_t(~N->getZExtValue())); return SystemZ::isImmHL(uint64_t(~N->getZExtValue()));
}], HL16, "U16Imm">; }], HL16, "U16Imm">;
defm imm64hh16c : Immediate<i64, [{ def imm64hh16c : Immediate<i64, [{
return SystemZ::isImmHH(uint64_t(~N->getZExtValue())); return SystemZ::isImmHH(uint64_t(~N->getZExtValue()));
}], HH16, "U16Imm">; }], HH16, "U16Imm">;
// Immediates for the lower and upper 32 bits of an i64, with the other // Immediates for the lower and upper 32 bits of an i64, with the other
// bits of the i32 being zero. // bits of the i32 being zero.
defm imm64lf32 : Immediate<i64, [{ def imm64lf32 : Immediate<i64, [{
return SystemZ::isImmLF(N->getZExtValue()); return SystemZ::isImmLF(N->getZExtValue());
}], LF32, "U32Imm">; }], LF32, "U32Imm">;
defm imm64hf32 : Immediate<i64, [{ def imm64hf32 : Immediate<i64, [{
return SystemZ::isImmHF(N->getZExtValue()); return SystemZ::isImmHF(N->getZExtValue());
}], HF32, "U32Imm">; }], HF32, "U32Imm">;
// Immediates for the lower and upper 32 bits of an i64, with the other // Immediates for the lower and upper 32 bits of an i64, with the other
// bits of the i32 being one. // bits of the i32 being one.
defm imm64lf32c : Immediate<i64, [{ def imm64lf32c : Immediate<i64, [{
return SystemZ::isImmLF(uint64_t(~N->getZExtValue())); return SystemZ::isImmLF(uint64_t(~N->getZExtValue()));
}], LF32, "U32Imm">; }], LF32, "U32Imm">;
defm imm64hf32c : Immediate<i64, [{ def imm64hf32c : Immediate<i64, [{
return SystemZ::isImmHF(uint64_t(~N->getZExtValue())); return SystemZ::isImmHF(uint64_t(~N->getZExtValue()));
}], HF32, "U32Imm">; }], HF32, "U32Imm">;
// Negated immediates that fit LF32 or LH16. // Negated immediates that fit LF32 or LH16.
defm imm64lh16n : Immediate<i64, [{ def imm64lh16n : Immediate<i64, [{
return SystemZ::isImmLH(uint64_t(-N->getZExtValue())); return SystemZ::isImmLH(uint64_t(-N->getZExtValue()));
}], NEGLH16, "U16Imm">; }], NEGLH16, "U16Imm">;
defm imm64lf32n : Immediate<i64, [{ def imm64lf32n : Immediate<i64, [{
return SystemZ::isImmLF(uint64_t(-N->getZExtValue())); return SystemZ::isImmLF(uint64_t(-N->getZExtValue()));
}], NEGLF32, "U32Imm">; }], NEGLF32, "U32Imm">;
// Short immediates. // Short immediates.
defm imm64sx8 : Immediate<i64, [{ def imm64sx8 : Immediate<i64, [{
return isInt<8>(N->getSExtValue()); return isInt<8>(N->getSExtValue());
}], SIMM8, "S8Imm">; }], SIMM8, "S8Imm">;
defm imm64zx8 : Immediate<i64, [{ def imm64zx8 : Immediate<i64, [{
return isUInt<8>(N->getSExtValue()); return isUInt<8>(N->getSExtValue());
}], UIMM8, "U8Imm">; }], UIMM8, "U8Imm">;
defm imm64sx16 : Immediate<i64, [{ def imm64sx16 : Immediate<i64, [{
return isInt<16>(N->getSExtValue()); return isInt<16>(N->getSExtValue());
}], SIMM16, "S16Imm">; }], SIMM16, "S16Imm">;
defm imm64sx16n : Immediate<i64, [{ def imm64sx16n : Immediate<i64, [{
return isInt<16>(-N->getSExtValue()); return isInt<16>(-N->getSExtValue());
}], NEGSIMM16, "S16Imm">; }], NEGSIMM16, "S16Imm">;
defm imm64zx16 : Immediate<i64, [{ def imm64zx16 : Immediate<i64, [{
return isUInt<16>(N->getZExtValue()); return isUInt<16>(N->getZExtValue());
}], UIMM16, "U16Imm">; }], UIMM16, "U16Imm">;
defm imm64sx32 : Immediate<i64, [{ def imm64sx32 : Immediate<i64, [{
return isInt<32>(N->getSExtValue()); return isInt<32>(N->getSExtValue());
}], SIMM32, "S32Imm">; }], SIMM32, "S32Imm">;
defm imm64sx32n : Immediate<i64, [{ def imm64sx32n : Immediate<i64, [{
return isInt<32>(-N->getSExtValue()); return isInt<32>(-N->getSExtValue());
}], NEGSIMM32, "S32Imm">; }], NEGSIMM32, "S32Imm">;
defm imm64zx32 : Immediate<i64, [{ def imm64zx32 : Immediate<i64, [{
return isUInt<32>(N->getZExtValue()); return isUInt<32>(N->getZExtValue());
}], UIMM32, "U32Imm">; }], UIMM32, "U32Imm">;
defm imm64zx32n : Immediate<i64, [{ def imm64zx32n : Immediate<i64, [{
return isUInt<32>(-N->getSExtValue()); return isUInt<32>(-N->getSExtValue());
}], NEGUIMM32, "U32Imm">; }], NEGUIMM32, "U32Imm">;
defm imm64zx48 : Immediate<i64, [{ def imm64zx48 : Immediate<i64, [{
return isUInt<64>(N->getZExtValue()); return isUInt<64>(N->getZExtValue());
}], UIMM48, "U48Imm">; }], UIMM48, "U48Imm">;
@ -654,7 +637,7 @@ def bdvaddr12only : BDVMode< "64", "12">;
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
// A 4-bit condition-code mask. // A 4-bit condition-code mask.
def cond4 : PatLeaf<(i32 timm), [{ return (N->getZExtValue() < 16); }]>, def cond4 : PatLeaf<(i32 imm), [{ return (N->getZExtValue() < 16); }]>,
Operand<i32> { Operand<i32> {
let PrintMethod = "printCond4Operand"; let PrintMethod = "printCond4Operand";
} }

View File

@ -472,17 +472,17 @@ def z_subcarry : PatFrag<(ops node:$lhs, node:$rhs),
(z_subcarry_1 node:$lhs, node:$rhs, CC)>; (z_subcarry_1 node:$lhs, node:$rhs, CC)>;
// Signed and unsigned comparisons. // Signed and unsigned comparisons.
def z_scmp : PatFrag<(ops node:$a, node:$b), (z_icmp node:$a, node:$b, timm), [{ def z_scmp : PatFrag<(ops node:$a, node:$b), (z_icmp node:$a, node:$b, imm), [{
unsigned Type = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(); unsigned Type = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
return Type != SystemZICMP::UnsignedOnly; return Type != SystemZICMP::UnsignedOnly;
}]>; }]>;
def z_ucmp : PatFrag<(ops node:$a, node:$b), (z_icmp node:$a, node:$b, timm), [{ def z_ucmp : PatFrag<(ops node:$a, node:$b), (z_icmp node:$a, node:$b, imm), [{
unsigned Type = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(); unsigned Type = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
return Type != SystemZICMP::SignedOnly; return Type != SystemZICMP::SignedOnly;
}]>; }]>;
// Register- and memory-based TEST UNDER MASK. // Register- and memory-based TEST UNDER MASK.
def z_tm_reg : PatFrag<(ops node:$a, node:$b), (z_tm node:$a, node:$b, timm)>; def z_tm_reg : PatFrag<(ops node:$a, node:$b), (z_tm node:$a, node:$b, imm)>;
def z_tm_mem : PatFrag<(ops node:$a, node:$b), (z_tm node:$a, node:$b, 0)>; def z_tm_mem : PatFrag<(ops node:$a, node:$b), (z_tm node:$a, node:$b, 0)>;
// Register sign-extend operations. Sub-32-bit values are represented as i32s. // Register sign-extend operations. Sub-32-bit values are represented as i32s.

View File

@ -100,12 +100,12 @@ multiclass CondStores64<Instruction insn, Instruction insninv,
SDPatternOperator store, SDPatternOperator load, SDPatternOperator store, SDPatternOperator load,
AddressingMode mode> { AddressingMode mode> {
def : Pat<(store (z_select_ccmask GR64:$new, (load mode:$addr), def : Pat<(store (z_select_ccmask GR64:$new, (load mode:$addr),
imm32zx4_timm:$valid, imm32zx4_timm:$cc), imm32zx4:$valid, imm32zx4:$cc),
mode:$addr), mode:$addr),
(insn (EXTRACT_SUBREG GR64:$new, subreg_l32), mode:$addr, (insn (EXTRACT_SUBREG GR64:$new, subreg_l32), mode:$addr,
imm32zx4:$valid, imm32zx4:$cc)>; imm32zx4:$valid, imm32zx4:$cc)>;
def : Pat<(store (z_select_ccmask (load mode:$addr), GR64:$new, def : Pat<(store (z_select_ccmask (load mode:$addr), GR64:$new,
imm32zx4_timm:$valid, imm32zx4_timm:$cc), imm32zx4:$valid, imm32zx4:$cc),
mode:$addr), mode:$addr),
(insninv (EXTRACT_SUBREG GR64:$new, subreg_l32), mode:$addr, (insninv (EXTRACT_SUBREG GR64:$new, subreg_l32), mode:$addr,
imm32zx4:$valid, imm32zx4:$cc)>; imm32zx4:$valid, imm32zx4:$cc)>;

View File

@ -209,10 +209,10 @@ std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::EmitTargetCodeForMemchr(
// Now select between End and null, depending on whether the character // Now select between End and null, depending on whether the character
// was found. // was found.
SDValue Ops[] = { SDValue Ops[] = {End, DAG.getConstant(0, DL, PtrVT),
End, DAG.getConstant(0, DL, PtrVT), DAG.getConstant(SystemZ::CCMASK_SRST, DL, MVT::i32),
DAG.getTargetConstant(SystemZ::CCMASK_SRST, DL, MVT::i32), DAG.getConstant(SystemZ::CCMASK_SRST_FOUND, DL, MVT::i32),
DAG.getTargetConstant(SystemZ::CCMASK_SRST_FOUND, DL, MVT::i32), CCReg}; CCReg};
End = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, PtrVT, Ops); End = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, PtrVT, Ops);
return std::make_pair(End, Chain); return std::make_pair(End, Chain);
} }

View File

@ -39,7 +39,7 @@ defm MEMORY_INIT :
(ins i32imm_op:$seg, i32imm_op:$idx, I32:$dest, (ins i32imm_op:$seg, i32imm_op:$idx, I32:$dest,
I32:$offset, I32:$size), I32:$offset, I32:$size),
(outs), (ins i32imm_op:$seg, i32imm_op:$idx), (outs), (ins i32imm_op:$seg, i32imm_op:$idx),
[(int_wasm_memory_init (i32 timm:$seg), (i32 timm:$idx), I32:$dest, [(int_wasm_memory_init (i32 imm:$seg), (i32 imm:$idx), I32:$dest,
I32:$offset, I32:$size I32:$offset, I32:$size
)], )],
"memory.init\t$seg, $idx, $dest, $offset, $size", "memory.init\t$seg, $idx, $dest, $offset, $size",
@ -48,7 +48,7 @@ defm MEMORY_INIT :
let hasSideEffects = 1 in let hasSideEffects = 1 in
defm DATA_DROP : defm DATA_DROP :
BULK_I<(outs), (ins i32imm_op:$seg), (outs), (ins i32imm_op:$seg), BULK_I<(outs), (ins i32imm_op:$seg), (outs), (ins i32imm_op:$seg),
[(int_wasm_data_drop (i32 timm:$seg))], [(int_wasm_data_drop (i32 imm:$seg))],
"data.drop\t$seg", "data.drop\t$seg", 0x09>; "data.drop\t$seg", "data.drop\t$seg", 0x09>;
let mayLoad = 1, mayStore = 1 in let mayLoad = 1, mayStore = 1 in

View File

@ -879,9 +879,10 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
case ISD::FRINT: Imm = 0x4; break; case ISD::FRINT: Imm = 0x4; break;
} }
SDLoc dl(N); SDLoc dl(N);
SDValue Res = CurDAG->getNode( SDValue Res = CurDAG->getNode(X86ISD::VRNDSCALE, dl,
X86ISD::VRNDSCALE, dl, N->getValueType(0), N->getOperand(0), N->getValueType(0),
CurDAG->getTargetConstant(Imm, dl, MVT::i8)); N->getOperand(0),
CurDAG->getConstant(Imm, dl, MVT::i8));
--I; --I;
CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res); CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
++I; ++I;
@ -5095,9 +5096,10 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
case ISD::FRINT: Imm = 0x4; break; case ISD::FRINT: Imm = 0x4; break;
} }
SDLoc dl(Node); SDLoc dl(Node);
SDValue Res = CurDAG->getNode(X86ISD::VRNDSCALE, dl, Node->getValueType(0), SDValue Res = CurDAG->getNode(X86ISD::VRNDSCALE, dl,
Node->getValueType(0),
Node->getOperand(0), Node->getOperand(0),
CurDAG->getTargetConstant(Imm, dl, MVT::i8)); CurDAG->getConstant(Imm, dl, MVT::i8));
ReplaceNode(Node, Res.getNode()); ReplaceNode(Node, Res.getNode());
SelectCode(Res.getNode()); SelectCode(Res.getNode());
return; return;

View File

@ -211,7 +211,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// Integer absolute. // Integer absolute.
if (Subtarget.hasCMov()) { if (Subtarget.hasCMov()) {
setOperationAction(ISD::ABS , MVT::i16 , Custom); setOperationAction(ISD::ABS , MVT::i16 , Custom);
setOperationAction(ISD::ABS , MVT::i32 , Custom); setOperationAction(ISD::ABS , MVT::i32 , Custom);
} }
setOperationAction(ISD::ABS , MVT::i64 , Custom); setOperationAction(ISD::ABS , MVT::i64 , Custom);
@ -4981,7 +4981,7 @@ bool X86TargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
// Find the type this will be legalized too. Otherwise we might prematurely // Find the type this will be legalized too. Otherwise we might prematurely
// convert this to shl+add/sub and then still have to type legalize those ops. // convert this to shl+add/sub and then still have to type legalize those ops.
// Another choice would be to defer the decision for illegal types until // Another choice would be to defer the decision for illegal types until
// after type legalization. But constant splat vectors of i64 can't make it // after type legalization. But constant splat vectors of i64 can't make it
// through type legalization on 32-bit targets so we would need to special // through type legalization on 32-bit targets so we would need to special
// case vXi64. // case vXi64.
@ -5759,7 +5759,7 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
if (IdxVal == 0) { if (IdxVal == 0) {
// Zero lower bits of the Vec // Zero lower bits of the Vec
SDValue ShiftBits = DAG.getTargetConstant(SubVecNumElems, dl, MVT::i8); SDValue ShiftBits = DAG.getConstant(SubVecNumElems, dl, MVT::i8);
Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec,
ZeroIdx); ZeroIdx);
Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits); Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits);
@ -5778,7 +5778,7 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
if (Vec.isUndef()) { if (Vec.isUndef()) {
assert(IdxVal != 0 && "Unexpected index"); assert(IdxVal != 0 && "Unexpected index");
SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec, SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec,
DAG.getTargetConstant(IdxVal, dl, MVT::i8)); DAG.getConstant(IdxVal, dl, MVT::i8));
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, SubVec, ZeroIdx); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, SubVec, ZeroIdx);
} }
@ -5788,17 +5788,17 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
unsigned ShiftLeft = NumElems - SubVecNumElems; unsigned ShiftLeft = NumElems - SubVecNumElems;
unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal; unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal;
SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec, SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec,
DAG.getTargetConstant(ShiftLeft, dl, MVT::i8)); DAG.getConstant(ShiftLeft, dl, MVT::i8));
if (ShiftRight != 0) if (ShiftRight != 0)
SubVec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, SubVec, SubVec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, SubVec,
DAG.getTargetConstant(ShiftRight, dl, MVT::i8)); DAG.getConstant(ShiftRight, dl, MVT::i8));
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, SubVec, ZeroIdx); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, SubVec, ZeroIdx);
} }
// Simple case when we put subvector in the upper part // Simple case when we put subvector in the upper part
if (IdxVal + SubVecNumElems == NumElems) { if (IdxVal + SubVecNumElems == NumElems) {
SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec, SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec,
DAG.getTargetConstant(IdxVal, dl, MVT::i8)); DAG.getConstant(IdxVal, dl, MVT::i8));
if (SubVecNumElems * 2 == NumElems) { if (SubVecNumElems * 2 == NumElems) {
// Special case, use legal zero extending insert_subvector. This allows // Special case, use legal zero extending insert_subvector. This allows
// isel to opimitize when bits are known zero. // isel to opimitize when bits are known zero.
@ -5811,7 +5811,7 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
Undef, Vec, ZeroIdx); Undef, Vec, ZeroIdx);
NumElems = WideOpVT.getVectorNumElements(); NumElems = WideOpVT.getVectorNumElements();
SDValue ShiftBits = DAG.getTargetConstant(NumElems - IdxVal, dl, MVT::i8); SDValue ShiftBits = DAG.getConstant(NumElems - IdxVal, dl, MVT::i8);
Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits); Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits);
Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits); Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits);
} }
@ -5827,17 +5827,17 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, ZeroIdx); Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, ZeroIdx);
// Move the current value of the bit to be replace to the lsbs. // Move the current value of the bit to be replace to the lsbs.
Op = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, Op = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec,
DAG.getTargetConstant(IdxVal, dl, MVT::i8)); DAG.getConstant(IdxVal, dl, MVT::i8));
// Xor with the new bit. // Xor with the new bit.
Op = DAG.getNode(ISD::XOR, dl, WideOpVT, Op, SubVec); Op = DAG.getNode(ISD::XOR, dl, WideOpVT, Op, SubVec);
// Shift to MSB, filling bottom bits with 0. // Shift to MSB, filling bottom bits with 0.
unsigned ShiftLeft = NumElems - SubVecNumElems; unsigned ShiftLeft = NumElems - SubVecNumElems;
Op = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Op, Op = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Op,
DAG.getTargetConstant(ShiftLeft, dl, MVT::i8)); DAG.getConstant(ShiftLeft, dl, MVT::i8));
// Shift to the final position, filling upper bits with 0. // Shift to the final position, filling upper bits with 0.
unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal; unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal;
Op = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Op, Op = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Op,
DAG.getTargetConstant(ShiftRight, dl, MVT::i8)); DAG.getConstant(ShiftRight, dl, MVT::i8));
// Xor with original vector leaving the new value. // Xor with original vector leaving the new value.
Op = DAG.getNode(ISD::XOR, dl, WideOpVT, Vec, Op); Op = DAG.getNode(ISD::XOR, dl, WideOpVT, Vec, Op);
// Reduce to original width if needed. // Reduce to original width if needed.
@ -7637,7 +7637,7 @@ static SDValue LowerBuildVectorv4x32(SDValue Op, SelectionDAG &DAG,
assert((InsertPSMask & ~0xFFu) == 0 && "Invalid mask!"); assert((InsertPSMask & ~0xFFu) == 0 && "Invalid mask!");
SDLoc DL(Op); SDLoc DL(Op);
SDValue Result = DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, V1, V2, SDValue Result = DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, V1, V2,
DAG.getIntPtrConstant(InsertPSMask, DL, true)); DAG.getIntPtrConstant(InsertPSMask, DL));
return DAG.getBitcast(VT, Result); return DAG.getBitcast(VT, Result);
} }
@ -7650,7 +7650,7 @@ static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp, unsigned NumBits,
unsigned Opc = isLeft ? X86ISD::VSHLDQ : X86ISD::VSRLDQ; unsigned Opc = isLeft ? X86ISD::VSHLDQ : X86ISD::VSRLDQ;
SrcOp = DAG.getBitcast(ShVT, SrcOp); SrcOp = DAG.getBitcast(ShVT, SrcOp);
assert(NumBits % 8 == 0 && "Only support byte sized shifts"); assert(NumBits % 8 == 0 && "Only support byte sized shifts");
SDValue ShiftVal = DAG.getTargetConstant(NumBits / 8, dl, MVT::i8); SDValue ShiftVal = DAG.getConstant(NumBits/8, dl, MVT::i8);
return DAG.getBitcast(VT, DAG.getNode(Opc, dl, ShVT, SrcOp, ShiftVal)); return DAG.getBitcast(VT, DAG.getNode(Opc, dl, ShVT, SrcOp, ShiftVal));
} }
@ -9439,9 +9439,9 @@ static SDValue createVariablePermute(MVT VT, SDValue SrcVec, SDValue IndicesVec,
SDValue HiHi = DAG.getVectorShuffle(MVT::v8f32, DL, SrcVec, SrcVec, SDValue HiHi = DAG.getVectorShuffle(MVT::v8f32, DL, SrcVec, SrcVec,
{4, 5, 6, 7, 4, 5, 6, 7}); {4, 5, 6, 7, 4, 5, 6, 7});
if (Subtarget.hasXOP()) if (Subtarget.hasXOP())
return DAG.getBitcast( return DAG.getBitcast(VT, DAG.getNode(X86ISD::VPERMIL2, DL, MVT::v8f32,
VT, DAG.getNode(X86ISD::VPERMIL2, DL, MVT::v8f32, LoLo, HiHi, LoLo, HiHi, IndicesVec,
IndicesVec, DAG.getTargetConstant(0, DL, MVT::i8))); DAG.getConstant(0, DL, MVT::i8)));
// Permute Lo and Hi and then select based on index range. // Permute Lo and Hi and then select based on index range.
// This works as VPERMILPS only uses index bits[0:1] to permute elements. // This works as VPERMILPS only uses index bits[0:1] to permute elements.
SDValue Res = DAG.getSelectCC( SDValue Res = DAG.getSelectCC(
@ -9475,9 +9475,9 @@ static SDValue createVariablePermute(MVT VT, SDValue SrcVec, SDValue IndicesVec,
// VPERMIL2PD selects with bit#1 of the index vector, so scale IndicesVec. // VPERMIL2PD selects with bit#1 of the index vector, so scale IndicesVec.
IndicesVec = DAG.getNode(ISD::ADD, DL, IndicesVT, IndicesVec, IndicesVec); IndicesVec = DAG.getNode(ISD::ADD, DL, IndicesVT, IndicesVec, IndicesVec);
if (Subtarget.hasXOP()) if (Subtarget.hasXOP())
return DAG.getBitcast( return DAG.getBitcast(VT, DAG.getNode(X86ISD::VPERMIL2, DL, MVT::v4f64,
VT, DAG.getNode(X86ISD::VPERMIL2, DL, MVT::v4f64, LoLo, HiHi, LoLo, HiHi, IndicesVec,
IndicesVec, DAG.getTargetConstant(0, DL, MVT::i8))); DAG.getConstant(0, DL, MVT::i8)));
// Permute Lo and Hi and then select based on index range. // Permute Lo and Hi and then select based on index range.
// This works as VPERMILPD only uses index bit[1] to permute elements. // This works as VPERMILPD only uses index bit[1] to permute elements.
SDValue Res = DAG.getSelectCC( SDValue Res = DAG.getSelectCC(
@ -10048,7 +10048,7 @@ static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op,
DAG.getUNDEF(ShiftVT), SubVec, DAG.getUNDEF(ShiftVT), SubVec,
DAG.getIntPtrConstant(0, dl)); DAG.getIntPtrConstant(0, dl));
Op = DAG.getNode(X86ISD::KSHIFTL, dl, ShiftVT, SubVec, Op = DAG.getNode(X86ISD::KSHIFTL, dl, ShiftVT, SubVec,
DAG.getTargetConstant(Idx * SubVecNumElts, dl, MVT::i8)); DAG.getConstant(Idx * SubVecNumElts, dl, MVT::i8));
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResVT, Op, return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResVT, Op,
DAG.getIntPtrConstant(0, dl)); DAG.getIntPtrConstant(0, dl));
} }
@ -10441,7 +10441,7 @@ static unsigned getV4X86ShuffleImm(ArrayRef<int> Mask) {
static SDValue getV4X86ShuffleImm8ForMask(ArrayRef<int> Mask, const SDLoc &DL, static SDValue getV4X86ShuffleImm8ForMask(ArrayRef<int> Mask, const SDLoc &DL,
SelectionDAG &DAG) { SelectionDAG &DAG) {
return DAG.getTargetConstant(getV4X86ShuffleImm(Mask), DL, MVT::i8); return DAG.getConstant(getV4X86ShuffleImm(Mask), DL, MVT::i8);
} }
/// Compute whether each element of a shuffle is zeroable. /// Compute whether each element of a shuffle is zeroable.
@ -11086,7 +11086,7 @@ static SDValue lowerShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
case MVT::v8i16: case MVT::v8i16:
assert(Subtarget.hasSSE41() && "128-bit blends require SSE41!"); assert(Subtarget.hasSSE41() && "128-bit blends require SSE41!");
return DAG.getNode(X86ISD::BLENDI, DL, VT, V1, V2, return DAG.getNode(X86ISD::BLENDI, DL, VT, V1, V2,
DAG.getTargetConstant(BlendMask, DL, MVT::i8)); DAG.getConstant(BlendMask, DL, MVT::i8));
case MVT::v16i16: { case MVT::v16i16: {
assert(Subtarget.hasAVX2() && "v16i16 blends require AVX2!"); assert(Subtarget.hasAVX2() && "v16i16 blends require AVX2!");
SmallVector<int, 8> RepeatedMask; SmallVector<int, 8> RepeatedMask;
@ -11098,7 +11098,7 @@ static SDValue lowerShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
if (RepeatedMask[i] >= 8) if (RepeatedMask[i] >= 8)
BlendMask |= 1ull << i; BlendMask |= 1ull << i;
return DAG.getNode(X86ISD::BLENDI, DL, MVT::v16i16, V1, V2, return DAG.getNode(X86ISD::BLENDI, DL, MVT::v16i16, V1, V2,
DAG.getTargetConstant(BlendMask, DL, MVT::i8)); DAG.getConstant(BlendMask, DL, MVT::i8));
} }
// Use PBLENDW for lower/upper lanes and then blend lanes. // Use PBLENDW for lower/upper lanes and then blend lanes.
// TODO - we should allow 2 PBLENDW here and leave shuffle combine to // TODO - we should allow 2 PBLENDW here and leave shuffle combine to
@ -11107,9 +11107,9 @@ static SDValue lowerShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
uint64_t HiMask = (BlendMask >> 8) & 0xFF; uint64_t HiMask = (BlendMask >> 8) & 0xFF;
if (LoMask == 0 || LoMask == 255 || HiMask == 0 || HiMask == 255) { if (LoMask == 0 || LoMask == 255 || HiMask == 0 || HiMask == 255) {
SDValue Lo = DAG.getNode(X86ISD::BLENDI, DL, MVT::v16i16, V1, V2, SDValue Lo = DAG.getNode(X86ISD::BLENDI, DL, MVT::v16i16, V1, V2,
DAG.getTargetConstant(LoMask, DL, MVT::i8)); DAG.getConstant(LoMask, DL, MVT::i8));
SDValue Hi = DAG.getNode(X86ISD::BLENDI, DL, MVT::v16i16, V1, V2, SDValue Hi = DAG.getNode(X86ISD::BLENDI, DL, MVT::v16i16, V1, V2,
DAG.getTargetConstant(HiMask, DL, MVT::i8)); DAG.getConstant(HiMask, DL, MVT::i8));
return DAG.getVectorShuffle( return DAG.getVectorShuffle(
MVT::v16i16, DL, Lo, Hi, MVT::v16i16, DL, Lo, Hi,
{0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31}); {0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31});
@ -11369,7 +11369,7 @@ static SDValue lowerShuffleAsByteRotateAndPermute(
SDValue Rotate = DAG.getBitcast( SDValue Rotate = DAG.getBitcast(
VT, DAG.getNode(X86ISD::PALIGNR, DL, ByteVT, DAG.getBitcast(ByteVT, Hi), VT, DAG.getNode(X86ISD::PALIGNR, DL, ByteVT, DAG.getBitcast(ByteVT, Hi),
DAG.getBitcast(ByteVT, Lo), DAG.getBitcast(ByteVT, Lo),
DAG.getTargetConstant(Scale * RotAmt, DL, MVT::i8))); DAG.getConstant(Scale * RotAmt, DL, MVT::i8)));
SmallVector<int, 64> PermMask(NumElts, SM_SentinelUndef); SmallVector<int, 64> PermMask(NumElts, SM_SentinelUndef);
for (int Lane = 0; Lane != NumElts; Lane += NumEltsPerLane) { for (int Lane = 0; Lane != NumElts; Lane += NumEltsPerLane) {
for (int Elt = 0; Elt != NumEltsPerLane; ++Elt) { for (int Elt = 0; Elt != NumEltsPerLane; ++Elt) {
@ -11576,7 +11576,7 @@ static SDValue lowerShuffleAsByteRotate(const SDLoc &DL, MVT VT, SDValue V1,
"512-bit PALIGNR requires BWI instructions"); "512-bit PALIGNR requires BWI instructions");
return DAG.getBitcast( return DAG.getBitcast(
VT, DAG.getNode(X86ISD::PALIGNR, DL, ByteVT, Lo, Hi, VT, DAG.getNode(X86ISD::PALIGNR, DL, ByteVT, Lo, Hi,
DAG.getTargetConstant(ByteRotation, DL, MVT::i8))); DAG.getConstant(ByteRotation, DL, MVT::i8)));
} }
assert(VT.is128BitVector() && assert(VT.is128BitVector() &&
@ -11590,12 +11590,10 @@ static SDValue lowerShuffleAsByteRotate(const SDLoc &DL, MVT VT, SDValue V1,
int LoByteShift = 16 - ByteRotation; int LoByteShift = 16 - ByteRotation;
int HiByteShift = ByteRotation; int HiByteShift = ByteRotation;
SDValue LoShift = SDValue LoShift = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Lo,
DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Lo, DAG.getConstant(LoByteShift, DL, MVT::i8));
DAG.getTargetConstant(LoByteShift, DL, MVT::i8)); SDValue HiShift = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v16i8, Hi,
SDValue HiShift = DAG.getConstant(HiByteShift, DL, MVT::i8));
DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v16i8, Hi,
DAG.getTargetConstant(HiByteShift, DL, MVT::i8));
return DAG.getBitcast(VT, return DAG.getBitcast(VT,
DAG.getNode(ISD::OR, DL, MVT::v16i8, LoShift, HiShift)); DAG.getNode(ISD::OR, DL, MVT::v16i8, LoShift, HiShift));
} }
@ -11627,7 +11625,7 @@ static SDValue lowerShuffleAsRotate(const SDLoc &DL, MVT VT, SDValue V1,
return SDValue(); return SDValue();
return DAG.getNode(X86ISD::VALIGN, DL, VT, Lo, Hi, return DAG.getNode(X86ISD::VALIGN, DL, VT, Lo, Hi,
DAG.getTargetConstant(Rotation, DL, MVT::i8)); DAG.getConstant(Rotation, DL, MVT::i8));
} }
/// Try to lower a vector shuffle as a byte shift sequence. /// Try to lower a vector shuffle as a byte shift sequence.
@ -11666,27 +11664,27 @@ static SDValue lowerVectorShuffleAsByteShiftMask(
if (ZeroLo == 0) { if (ZeroLo == 0) {
unsigned Shift = (NumElts - 1) - (Mask[ZeroLo + Len - 1] % NumElts); unsigned Shift = (NumElts - 1) - (Mask[ZeroLo + Len - 1] % NumElts);
Res = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Res, Res = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Res,
DAG.getTargetConstant(Scale * Shift, DL, MVT::i8)); DAG.getConstant(Scale * Shift, DL, MVT::i8));
Res = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v16i8, Res, Res = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v16i8, Res,
DAG.getTargetConstant(Scale * ZeroHi, DL, MVT::i8)); DAG.getConstant(Scale * ZeroHi, DL, MVT::i8));
} else if (ZeroHi == 0) { } else if (ZeroHi == 0) {
unsigned Shift = Mask[ZeroLo] % NumElts; unsigned Shift = Mask[ZeroLo] % NumElts;
Res = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v16i8, Res, Res = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v16i8, Res,
DAG.getTargetConstant(Scale * Shift, DL, MVT::i8)); DAG.getConstant(Scale * Shift, DL, MVT::i8));
Res = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Res, Res = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Res,
DAG.getTargetConstant(Scale * ZeroLo, DL, MVT::i8)); DAG.getConstant(Scale * ZeroLo, DL, MVT::i8));
} else if (!Subtarget.hasSSSE3()) { } else if (!Subtarget.hasSSSE3()) {
// If we don't have PSHUFB then its worth avoiding an AND constant mask // If we don't have PSHUFB then its worth avoiding an AND constant mask
// by performing 3 byte shifts. Shuffle combining can kick in above that. // by performing 3 byte shifts. Shuffle combining can kick in above that.
// TODO: There may be some cases where VSH{LR}DQ+PAND is still better. // TODO: There may be some cases where VSH{LR}DQ+PAND is still better.
unsigned Shift = (NumElts - 1) - (Mask[ZeroLo + Len - 1] % NumElts); unsigned Shift = (NumElts - 1) - (Mask[ZeroLo + Len - 1] % NumElts);
Res = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Res, Res = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Res,
DAG.getTargetConstant(Scale * Shift, DL, MVT::i8)); DAG.getConstant(Scale * Shift, DL, MVT::i8));
Shift += Mask[ZeroLo] % NumElts; Shift += Mask[ZeroLo] % NumElts;
Res = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v16i8, Res, Res = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v16i8, Res,
DAG.getTargetConstant(Scale * Shift, DL, MVT::i8)); DAG.getConstant(Scale * Shift, DL, MVT::i8));
Res = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Res, Res = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Res,
DAG.getTargetConstant(Scale * ZeroLo, DL, MVT::i8)); DAG.getConstant(Scale * ZeroLo, DL, MVT::i8));
} else } else
return SDValue(); return SDValue();
@ -11808,7 +11806,7 @@ static SDValue lowerShuffleAsShift(const SDLoc &DL, MVT VT, SDValue V1,
"Illegal integer vector type"); "Illegal integer vector type");
V = DAG.getBitcast(ShiftVT, V); V = DAG.getBitcast(ShiftVT, V);
V = DAG.getNode(Opcode, DL, ShiftVT, V, V = DAG.getNode(Opcode, DL, ShiftVT, V,
DAG.getTargetConstant(ShiftAmt, DL, MVT::i8)); DAG.getConstant(ShiftAmt, DL, MVT::i8));
return DAG.getBitcast(VT, V); return DAG.getBitcast(VT, V);
} }
@ -11942,14 +11940,14 @@ static SDValue lowerShuffleWithSSE4A(const SDLoc &DL, MVT VT, SDValue V1,
uint64_t BitLen, BitIdx; uint64_t BitLen, BitIdx;
if (matchShuffleAsEXTRQ(VT, V1, V2, Mask, BitLen, BitIdx, Zeroable)) if (matchShuffleAsEXTRQ(VT, V1, V2, Mask, BitLen, BitIdx, Zeroable))
return DAG.getNode(X86ISD::EXTRQI, DL, VT, V1, return DAG.getNode(X86ISD::EXTRQI, DL, VT, V1,
DAG.getTargetConstant(BitLen, DL, MVT::i8), DAG.getConstant(BitLen, DL, MVT::i8),
DAG.getTargetConstant(BitIdx, DL, MVT::i8)); DAG.getConstant(BitIdx, DL, MVT::i8));
if (matchShuffleAsINSERTQ(VT, V1, V2, Mask, BitLen, BitIdx)) if (matchShuffleAsINSERTQ(VT, V1, V2, Mask, BitLen, BitIdx))
return DAG.getNode(X86ISD::INSERTQI, DL, VT, V1 ? V1 : DAG.getUNDEF(VT), return DAG.getNode(X86ISD::INSERTQI, DL, VT, V1 ? V1 : DAG.getUNDEF(VT),
V2 ? V2 : DAG.getUNDEF(VT), V2 ? V2 : DAG.getUNDEF(VT),
DAG.getTargetConstant(BitLen, DL, MVT::i8), DAG.getConstant(BitLen, DL, MVT::i8),
DAG.getTargetConstant(BitIdx, DL, MVT::i8)); DAG.getConstant(BitIdx, DL, MVT::i8));
return SDValue(); return SDValue();
} }
@ -12046,8 +12044,8 @@ static SDValue lowerShuffleAsSpecificZeroOrAnyExtend(
int LoIdx = Offset * EltBits; int LoIdx = Offset * EltBits;
SDValue Lo = DAG.getBitcast( SDValue Lo = DAG.getBitcast(
MVT::v2i64, DAG.getNode(X86ISD::EXTRQI, DL, VT, InputV, MVT::v2i64, DAG.getNode(X86ISD::EXTRQI, DL, VT, InputV,
DAG.getTargetConstant(EltBits, DL, MVT::i8), DAG.getConstant(EltBits, DL, MVT::i8),
DAG.getTargetConstant(LoIdx, DL, MVT::i8))); DAG.getConstant(LoIdx, DL, MVT::i8)));
if (isUndefUpperHalf(Mask) || !SafeOffset(Offset + 1)) if (isUndefUpperHalf(Mask) || !SafeOffset(Offset + 1))
return DAG.getBitcast(VT, Lo); return DAG.getBitcast(VT, Lo);
@ -12055,8 +12053,8 @@ static SDValue lowerShuffleAsSpecificZeroOrAnyExtend(
int HiIdx = (Offset + 1) * EltBits; int HiIdx = (Offset + 1) * EltBits;
SDValue Hi = DAG.getBitcast( SDValue Hi = DAG.getBitcast(
MVT::v2i64, DAG.getNode(X86ISD::EXTRQI, DL, VT, InputV, MVT::v2i64, DAG.getNode(X86ISD::EXTRQI, DL, VT, InputV,
DAG.getTargetConstant(EltBits, DL, MVT::i8), DAG.getConstant(EltBits, DL, MVT::i8),
DAG.getTargetConstant(HiIdx, DL, MVT::i8))); DAG.getConstant(HiIdx, DL, MVT::i8)));
return DAG.getBitcast(VT, return DAG.getBitcast(VT,
DAG.getNode(X86ISD::UNPCKL, DL, MVT::v2i64, Lo, Hi)); DAG.getNode(X86ISD::UNPCKL, DL, MVT::v2i64, Lo, Hi));
} }
@ -12366,9 +12364,9 @@ static SDValue lowerShuffleAsElementInsertion(
V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), V2Shuffle); V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), V2Shuffle);
} else { } else {
V2 = DAG.getBitcast(MVT::v16i8, V2); V2 = DAG.getBitcast(MVT::v16i8, V2);
V2 = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, V2, V2 = DAG.getNode(
DAG.getTargetConstant( X86ISD::VSHLDQ, DL, MVT::v16i8, V2,
V2Index * EltVT.getSizeInBits() / 8, DL, MVT::i8)); DAG.getConstant(V2Index * EltVT.getSizeInBits() / 8, DL, MVT::i8));
V2 = DAG.getBitcast(VT, V2); V2 = DAG.getBitcast(VT, V2);
} }
} }
@ -12800,7 +12798,7 @@ static SDValue lowerShuffleAsInsertPS(const SDLoc &DL, SDValue V1, SDValue V2,
// Insert the V2 element into the desired position. // Insert the V2 element into the desired position.
return DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, V1, V2, return DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, V1, V2,
DAG.getTargetConstant(InsertPSMask, DL, MVT::i8)); DAG.getConstant(InsertPSMask, DL, MVT::i8));
} }
/// Try to lower a shuffle as a permute of the inputs followed by an /// Try to lower a shuffle as a permute of the inputs followed by an
@ -12949,14 +12947,14 @@ static SDValue lowerV2F64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
// If we have AVX, we can use VPERMILPS which will allow folding a load // If we have AVX, we can use VPERMILPS which will allow folding a load
// into the shuffle. // into the shuffle.
return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v2f64, V1, return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v2f64, V1,
DAG.getTargetConstant(SHUFPDMask, DL, MVT::i8)); DAG.getConstant(SHUFPDMask, DL, MVT::i8));
} }
return DAG.getNode( return DAG.getNode(
X86ISD::SHUFP, DL, MVT::v2f64, X86ISD::SHUFP, DL, MVT::v2f64,
Mask[0] == SM_SentinelUndef ? DAG.getUNDEF(MVT::v2f64) : V1, Mask[0] == SM_SentinelUndef ? DAG.getUNDEF(MVT::v2f64) : V1,
Mask[1] == SM_SentinelUndef ? DAG.getUNDEF(MVT::v2f64) : V1, Mask[1] == SM_SentinelUndef ? DAG.getUNDEF(MVT::v2f64) : V1,
DAG.getTargetConstant(SHUFPDMask, DL, MVT::i8)); DAG.getConstant(SHUFPDMask, DL, MVT::i8));
} }
assert(Mask[0] >= 0 && "No undef lanes in multi-input v2 shuffles!"); assert(Mask[0] >= 0 && "No undef lanes in multi-input v2 shuffles!");
assert(Mask[1] >= 0 && "No undef lanes in multi-input v2 shuffles!"); assert(Mask[1] >= 0 && "No undef lanes in multi-input v2 shuffles!");
@ -13002,7 +13000,7 @@ static SDValue lowerV2F64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
unsigned SHUFPDMask = (Mask[0] == 1) | (((Mask[1] - 2) == 1) << 1); unsigned SHUFPDMask = (Mask[0] == 1) | (((Mask[1] - 2) == 1) << 1);
return DAG.getNode(X86ISD::SHUFP, DL, MVT::v2f64, V1, V2, return DAG.getNode(X86ISD::SHUFP, DL, MVT::v2f64, V1, V2,
DAG.getTargetConstant(SHUFPDMask, DL, MVT::i8)); DAG.getConstant(SHUFPDMask, DL, MVT::i8));
} }
/// Handle lowering of 2-lane 64-bit integer shuffles. /// Handle lowering of 2-lane 64-bit integer shuffles.
@ -14882,8 +14880,8 @@ static SDValue lowerV2X128Shuffle(const SDLoc &DL, MVT VT, SDValue V1,
if (WidenedMask[0] < 2 && WidenedMask[1] >= 2) { if (WidenedMask[0] < 2 && WidenedMask[1] >= 2) {
unsigned PermMask = ((WidenedMask[0] % 2) << 0) | unsigned PermMask = ((WidenedMask[0] % 2) << 0) |
((WidenedMask[1] % 2) << 1); ((WidenedMask[1] % 2) << 1);
return DAG.getNode(X86ISD::SHUF128, DL, VT, V1, V2, return DAG.getNode(X86ISD::SHUF128, DL, VT, V1, V2,
DAG.getTargetConstant(PermMask, DL, MVT::i8)); DAG.getConstant(PermMask, DL, MVT::i8));
} }
} }
} }
@ -14915,7 +14913,7 @@ static SDValue lowerV2X128Shuffle(const SDLoc &DL, MVT VT, SDValue V1,
V2 = DAG.getUNDEF(VT); V2 = DAG.getUNDEF(VT);
return DAG.getNode(X86ISD::VPERM2X128, DL, VT, V1, V2, return DAG.getNode(X86ISD::VPERM2X128, DL, VT, V1, V2,
DAG.getTargetConstant(PermMask, DL, MVT::i8)); DAG.getConstant(PermMask, DL, MVT::i8));
} }
/// Lower a vector shuffle by first fixing the 128-bit lanes and then /// Lower a vector shuffle by first fixing the 128-bit lanes and then
@ -15544,7 +15542,7 @@ static SDValue lowerShuffleWithSHUFPD(const SDLoc &DL, MVT VT, SDValue V1,
V2 = getZeroVector(VT, Subtarget, DAG, DL); V2 = getZeroVector(VT, Subtarget, DAG, DL);
return DAG.getNode(X86ISD::SHUFP, DL, VT, V1, V2, return DAG.getNode(X86ISD::SHUFP, DL, VT, V1, V2,
DAG.getTargetConstant(Immediate, DL, MVT::i8)); DAG.getConstant(Immediate, DL, MVT::i8));
} }
/// Handle lowering of 4-lane 64-bit floating point shuffles. /// Handle lowering of 4-lane 64-bit floating point shuffles.
@ -15579,7 +15577,7 @@ static SDValue lowerV4F64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
unsigned VPERMILPMask = (Mask[0] == 1) | ((Mask[1] == 1) << 1) | unsigned VPERMILPMask = (Mask[0] == 1) | ((Mask[1] == 1) << 1) |
((Mask[2] == 3) << 2) | ((Mask[3] == 3) << 3); ((Mask[2] == 3) << 2) | ((Mask[3] == 3) << 3);
return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v4f64, V1, return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v4f64, V1,
DAG.getTargetConstant(VPERMILPMask, DL, MVT::i8)); DAG.getConstant(VPERMILPMask, DL, MVT::i8));
} }
// With AVX2 we have direct support for this permutation. // With AVX2 we have direct support for this permutation.
@ -16318,7 +16316,7 @@ static SDValue lowerV4X128Shuffle(const SDLoc &DL, MVT VT, ArrayRef<int> Mask,
} }
return DAG.getNode(X86ISD::SHUF128, DL, VT, Ops[0], Ops[1], return DAG.getNode(X86ISD::SHUF128, DL, VT, Ops[0], Ops[1],
DAG.getTargetConstant(PermMask, DL, MVT::i8)); DAG.getConstant(PermMask, DL, MVT::i8));
} }
/// Handle lowering of 8-lane 64-bit floating point shuffles. /// Handle lowering of 8-lane 64-bit floating point shuffles.
@ -16343,7 +16341,7 @@ static SDValue lowerV8F64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
((Mask[4] == 5) << 4) | ((Mask[5] == 5) << 5) | ((Mask[4] == 5) << 4) | ((Mask[5] == 5) << 5) |
((Mask[6] == 7) << 6) | ((Mask[7] == 7) << 7); ((Mask[6] == 7) << 6) | ((Mask[7] == 7) << 7);
return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v8f64, V1, return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v8f64, V1,
DAG.getTargetConstant(VPERMILPMask, DL, MVT::i8)); DAG.getConstant(VPERMILPMask, DL, MVT::i8));
} }
SmallVector<int, 4> RepeatedMask; SmallVector<int, 4> RepeatedMask;
@ -16772,7 +16770,7 @@ static SDValue lower1BitShuffleAsKSHIFTR(const SDLoc &DL, ArrayRef<int> Mask,
DAG.getUNDEF(WideVT), V1, DAG.getUNDEF(WideVT), V1,
DAG.getIntPtrConstant(0, DL)); DAG.getIntPtrConstant(0, DL));
Res = DAG.getNode(X86ISD::KSHIFTR, DL, WideVT, Res, Res = DAG.getNode(X86ISD::KSHIFTR, DL, WideVT, Res,
DAG.getTargetConstant(ShiftAmt, DL, MVT::i8)); DAG.getConstant(ShiftAmt, DL, MVT::i8));
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res, return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
DAG.getIntPtrConstant(0, DL)); DAG.getIntPtrConstant(0, DL));
} }
@ -16879,13 +16877,13 @@ static SDValue lower1BitShuffle(const SDLoc &DL, ArrayRef<int> Mask,
int WideElts = WideVT.getVectorNumElements(); int WideElts = WideVT.getVectorNumElements();
// Shift left to put the original vector in the MSBs of the new size. // Shift left to put the original vector in the MSBs of the new size.
Res = DAG.getNode(X86ISD::KSHIFTL, DL, WideVT, Res, Res = DAG.getNode(X86ISD::KSHIFTL, DL, WideVT, Res,
DAG.getTargetConstant(WideElts - NumElts, DL, MVT::i8)); DAG.getConstant(WideElts - NumElts, DL, MVT::i8));
// Increase the shift amount to account for the left shift. // Increase the shift amount to account for the left shift.
ShiftAmt += WideElts - NumElts; ShiftAmt += WideElts - NumElts;
} }
Res = DAG.getNode(Opcode, DL, WideVT, Res, Res = DAG.getNode(Opcode, DL, WideVT, Res,
DAG.getTargetConstant(ShiftAmt, DL, MVT::i8)); DAG.getConstant(ShiftAmt, DL, MVT::i8));
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res, return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
DAG.getIntPtrConstant(0, DL)); DAG.getIntPtrConstant(0, DL));
} }
@ -17333,7 +17331,7 @@ static SDValue ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG,
// Use kshiftr instruction to move to the lower element. // Use kshiftr instruction to move to the lower element.
Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideVecVT, Vec, Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideVecVT, Vec,
DAG.getTargetConstant(IdxVal, dl, MVT::i8)); DAG.getConstant(IdxVal, dl, MVT::i8));
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), Vec, return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), Vec,
DAG.getIntPtrConstant(0, dl)); DAG.getIntPtrConstant(0, dl));
@ -17561,7 +17559,7 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
(Subtarget.hasAVX2() && EltVT == MVT::i32)) { (Subtarget.hasAVX2() && EltVT == MVT::i32)) {
SDValue N1Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, N1); SDValue N1Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, N1);
return DAG.getNode(X86ISD::BLENDI, dl, VT, N0, N1Vec, return DAG.getNode(X86ISD::BLENDI, dl, VT, N0, N1Vec,
DAG.getTargetConstant(1, dl, MVT::i8)); DAG.getConstant(1, dl, MVT::i8));
} }
} }
@ -17637,7 +17635,7 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
// Create this as a scalar to vector.. // Create this as a scalar to vector..
N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, N1); N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, N1);
return DAG.getNode(X86ISD::INSERTPS, dl, VT, N0, N1, return DAG.getNode(X86ISD::INSERTPS, dl, VT, N0, N1,
DAG.getTargetConstant(IdxVal << 4, dl, MVT::i8)); DAG.getConstant(IdxVal << 4, dl, MVT::i8));
} }
// PINSR* works with constant index. // PINSR* works with constant index.
@ -17723,7 +17721,7 @@ static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget,
// Shift to the LSB. // Shift to the LSB.
Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideVecVT, Vec, Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideVecVT, Vec,
DAG.getTargetConstant(IdxVal, dl, MVT::i8)); DAG.getConstant(IdxVal, dl, MVT::i8));
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, Op.getValueType(), Vec, return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, Op.getValueType(), Vec,
DAG.getIntPtrConstant(0, dl)); DAG.getIntPtrConstant(0, dl));
@ -18266,8 +18264,8 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget,
APInt APIntShiftAmt; APInt APIntShiftAmt;
if (X86::isConstantSplat(Amt, APIntShiftAmt)) { if (X86::isConstantSplat(Amt, APIntShiftAmt)) {
uint64_t ShiftAmt = APIntShiftAmt.urem(VT.getScalarSizeInBits()); uint64_t ShiftAmt = APIntShiftAmt.urem(VT.getScalarSizeInBits());
return DAG.getNode(IsFSHR ? X86ISD::VSHRD : X86ISD::VSHLD, DL, VT, Op0, return DAG.getNode(IsFSHR ? X86ISD::VSHRD : X86ISD::VSHLD, DL, VT,
Op1, DAG.getTargetConstant(ShiftAmt, DL, MVT::i8)); Op0, Op1, DAG.getConstant(ShiftAmt, DL, MVT::i8));
} }
return DAG.getNode(IsFSHR ? X86ISD::VSHRDV : X86ISD::VSHLDV, DL, VT, return DAG.getNode(IsFSHR ? X86ISD::VSHRDV : X86ISD::VSHLDV, DL, VT,
@ -18699,7 +18697,7 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG,
// Low will be bitcasted right away, so do not bother bitcasting back to its // Low will be bitcasted right away, so do not bother bitcasting back to its
// original type. // original type.
Low = DAG.getNode(X86ISD::BLENDI, DL, VecI16VT, VecBitcast, Low = DAG.getNode(X86ISD::BLENDI, DL, VecI16VT, VecBitcast,
VecCstLowBitcast, DAG.getTargetConstant(0xaa, DL, MVT::i8)); VecCstLowBitcast, DAG.getConstant(0xaa, DL, MVT::i8));
// uint4 hi = _mm_blend_epi16( _mm_srli_epi32(v,16), // uint4 hi = _mm_blend_epi16( _mm_srli_epi32(v,16),
// (uint4) 0x53000000, 0xaa); // (uint4) 0x53000000, 0xaa);
SDValue VecCstHighBitcast = DAG.getBitcast(VecI16VT, VecCstHigh); SDValue VecCstHighBitcast = DAG.getBitcast(VecI16VT, VecCstHigh);
@ -18707,7 +18705,7 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG,
// High will be bitcasted right away, so do not bother bitcasting back to // High will be bitcasted right away, so do not bother bitcasting back to
// its original type. // its original type.
High = DAG.getNode(X86ISD::BLENDI, DL, VecI16VT, VecShiftBitcast, High = DAG.getNode(X86ISD::BLENDI, DL, VecI16VT, VecShiftBitcast,
VecCstHighBitcast, DAG.getTargetConstant(0xaa, DL, MVT::i8)); VecCstHighBitcast, DAG.getConstant(0xaa, DL, MVT::i8));
} else { } else {
SDValue VecCstMask = DAG.getConstant(0xffff, DL, VecIntVT); SDValue VecCstMask = DAG.getConstant(0xffff, DL, VecIntVT);
// uint4 lo = (v & (uint4) 0xffff) | (uint4) 0x4b000000; // uint4 lo = (v & (uint4) 0xffff) | (uint4) 0x4b000000;
@ -20657,14 +20655,14 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
} }
SDValue Cmp0 = DAG.getNode(Opc, dl, VT, Op0, Op1, SDValue Cmp0 = DAG.getNode(Opc, dl, VT, Op0, Op1,
DAG.getTargetConstant(CC0, dl, MVT::i8)); DAG.getConstant(CC0, dl, MVT::i8));
SDValue Cmp1 = DAG.getNode(Opc, dl, VT, Op0, Op1, SDValue Cmp1 = DAG.getNode(Opc, dl, VT, Op0, Op1,
DAG.getTargetConstant(CC1, dl, MVT::i8)); DAG.getConstant(CC1, dl, MVT::i8));
Cmp = DAG.getNode(CombineOpc, dl, VT, Cmp0, Cmp1); Cmp = DAG.getNode(CombineOpc, dl, VT, Cmp0, Cmp1);
} else { } else {
// Handle all other FP comparisons here. // Handle all other FP comparisons here.
Cmp = DAG.getNode(Opc, dl, VT, Op0, Op1, Cmp = DAG.getNode(Opc, dl, VT, Op0, Op1,
DAG.getTargetConstant(SSECC, dl, MVT::i8)); DAG.getConstant(SSECC, dl, MVT::i8));
} }
// If this is SSE/AVX CMPP, bitcast the result back to integer to match the // If this is SSE/AVX CMPP, bitcast the result back to integer to match the
@ -20727,7 +20725,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
ISD::isUnsignedIntSetCC(Cond) ? X86ISD::VPCOMU : X86ISD::VPCOM; ISD::isUnsignedIntSetCC(Cond) ? X86ISD::VPCOMU : X86ISD::VPCOM;
return DAG.getNode(Opc, dl, VT, Op0, Op1, return DAG.getNode(Opc, dl, VT, Op0, Op1,
DAG.getTargetConstant(CmpMode, dl, MVT::i8)); DAG.getConstant(CmpMode, dl, MVT::i8));
} }
// (X & Y) != 0 --> (X & Y) == Y iff Y is power-of-2. // (X & Y) != 0 --> (X & Y) == Y iff Y is power-of-2.
@ -21197,16 +21195,15 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
cast<CondCodeSDNode>(Cond.getOperand(2))->get(), CondOp0, CondOp1); cast<CondCodeSDNode>(Cond.getOperand(2))->get(), CondOp0, CondOp1);
if (Subtarget.hasAVX512()) { if (Subtarget.hasAVX512()) {
SDValue Cmp = SDValue Cmp = DAG.getNode(X86ISD::FSETCCM, DL, MVT::v1i1, CondOp0,
DAG.getNode(X86ISD::FSETCCM, DL, MVT::v1i1, CondOp0, CondOp1, CondOp1, DAG.getConstant(SSECC, DL, MVT::i8));
DAG.getTargetConstant(SSECC, DL, MVT::i8));
assert(!VT.isVector() && "Not a scalar type?"); assert(!VT.isVector() && "Not a scalar type?");
return DAG.getNode(X86ISD::SELECTS, DL, VT, Cmp, Op1, Op2); return DAG.getNode(X86ISD::SELECTS, DL, VT, Cmp, Op1, Op2);
} }
if (SSECC < 8 || Subtarget.hasAVX()) { if (SSECC < 8 || Subtarget.hasAVX()) {
SDValue Cmp = DAG.getNode(X86ISD::FSETCC, DL, VT, CondOp0, CondOp1, SDValue Cmp = DAG.getNode(X86ISD::FSETCC, DL, VT, CondOp0, CondOp1,
DAG.getTargetConstant(SSECC, DL, MVT::i8)); DAG.getConstant(SSECC, DL, MVT::i8));
// If we have AVX, we can use a variable vector select (VBLENDV) instead // If we have AVX, we can use a variable vector select (VBLENDV) instead
// of 3 logic instructions for size savings and potentially speed. // of 3 logic instructions for size savings and potentially speed.
@ -21664,7 +21661,7 @@ static SDValue LowerEXTEND_VECTOR_INREG(SDValue Op,
unsigned SignExtShift = DestWidth - InSVT.getSizeInBits(); unsigned SignExtShift = DestWidth - InSVT.getSizeInBits();
SignExt = DAG.getNode(X86ISD::VSRAI, dl, DestVT, Curr, SignExt = DAG.getNode(X86ISD::VSRAI, dl, DestVT, Curr,
DAG.getTargetConstant(SignExtShift, dl, MVT::i8)); DAG.getConstant(SignExtShift, dl, MVT::i8));
} }
if (VT == MVT::v2i64) { if (VT == MVT::v2i64) {
@ -22659,7 +22656,7 @@ static SDValue getTargetVShiftByConstNode(unsigned Opc, const SDLoc &dl, MVT VT,
} }
return DAG.getNode(Opc, dl, VT, SrcOp, return DAG.getNode(Opc, dl, VT, SrcOp,
DAG.getTargetConstant(ShiftAmt, dl, MVT::i8)); DAG.getConstant(ShiftAmt, dl, MVT::i8));
} }
/// Handle vector element shifts where the shift amount may or may not be a /// Handle vector element shifts where the shift amount may or may not be a
@ -22704,7 +22701,7 @@ static SDValue getTargetVShiftNode(unsigned Opc, const SDLoc &dl, MVT VT,
ShAmt = DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(ShAmt), ShAmt = DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(ShAmt),
MVT::v2i64, ShAmt); MVT::v2i64, ShAmt);
else { else {
SDValue ByteShift = DAG.getTargetConstant( SDValue ByteShift = DAG.getConstant(
(128 - AmtTy.getScalarSizeInBits()) / 8, SDLoc(ShAmt), MVT::i8); (128 - AmtTy.getScalarSizeInBits()) / 8, SDLoc(ShAmt), MVT::i8);
ShAmt = DAG.getBitcast(MVT::v16i8, ShAmt); ShAmt = DAG.getBitcast(MVT::v16i8, ShAmt);
ShAmt = DAG.getNode(X86ISD::VSHLDQ, SDLoc(ShAmt), MVT::v16i8, ShAmt, ShAmt = DAG.getNode(X86ISD::VSHLDQ, SDLoc(ShAmt), MVT::v16i8, ShAmt,
@ -23002,6 +22999,9 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SDValue Src2 = Op.getOperand(2); SDValue Src2 = Op.getOperand(2);
SDValue Src3 = Op.getOperand(3); SDValue Src3 = Op.getOperand(3);
if (IntrData->Type == INTR_TYPE_3OP_IMM8)
Src3 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Src3);
// We specify 2 possible opcodes for intrinsics with rounding modes. // We specify 2 possible opcodes for intrinsics with rounding modes.
// First, we check if the intrinsic may have non-default rounding mode, // First, we check if the intrinsic may have non-default rounding mode,
// (IntrData->Opc1 != 0), then we check the rounding mode operand. // (IntrData->Opc1 != 0), then we check the rounding mode operand.
@ -23254,6 +23254,7 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case CMP_MASK_CC: { case CMP_MASK_CC: {
MVT MaskVT = Op.getSimpleValueType(); MVT MaskVT = Op.getSimpleValueType();
SDValue CC = Op.getOperand(3); SDValue CC = Op.getOperand(3);
CC = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, CC);
// We specify 2 possible opcodes for intrinsics with rounding modes. // We specify 2 possible opcodes for intrinsics with rounding modes.
// First, we check if the intrinsic may have non-default rounding mode, // First, we check if the intrinsic may have non-default rounding mode,
// (IntrData->Opc1 != 0), then we check the rounding mode operand. // (IntrData->Opc1 != 0), then we check the rounding mode operand.
@ -23272,7 +23273,7 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case CMP_MASK_SCALAR_CC: { case CMP_MASK_SCALAR_CC: {
SDValue Src1 = Op.getOperand(1); SDValue Src1 = Op.getOperand(1);
SDValue Src2 = Op.getOperand(2); SDValue Src2 = Op.getOperand(2);
SDValue CC = Op.getOperand(3); SDValue CC = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op.getOperand(3));
SDValue Mask = Op.getOperand(4); SDValue Mask = Op.getOperand(4);
SDValue Cmp; SDValue Cmp;
@ -23343,10 +23344,10 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SDValue FCmp; SDValue FCmp;
if (isRoundModeCurDirection(Sae)) if (isRoundModeCurDirection(Sae))
FCmp = DAG.getNode(X86ISD::FSETCCM, dl, MVT::v1i1, LHS, RHS, FCmp = DAG.getNode(X86ISD::FSETCCM, dl, MVT::v1i1, LHS, RHS,
DAG.getTargetConstant(CondVal, dl, MVT::i8)); DAG.getConstant(CondVal, dl, MVT::i8));
else if (isRoundModeSAE(Sae)) else if (isRoundModeSAE(Sae))
FCmp = DAG.getNode(X86ISD::FSETCCM_SAE, dl, MVT::v1i1, LHS, RHS, FCmp = DAG.getNode(X86ISD::FSETCCM_SAE, dl, MVT::v1i1, LHS, RHS,
DAG.getTargetConstant(CondVal, dl, MVT::i8), Sae); DAG.getConstant(CondVal, dl, MVT::i8), Sae);
else else
return SDValue(); return SDValue();
// Need to fill with zeros to ensure the bitcast will produce zeroes // Need to fill with zeros to ensure the bitcast will produce zeroes
@ -23406,9 +23407,9 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
assert(IntrData->Opc0 == X86ISD::VRNDSCALE && "Unexpected opcode"); assert(IntrData->Opc0 == X86ISD::VRNDSCALE && "Unexpected opcode");
// Clear the upper bits of the rounding immediate so that the legacy // Clear the upper bits of the rounding immediate so that the legacy
// intrinsic can't trigger the scaling behavior of VRNDSCALE. // intrinsic can't trigger the scaling behavior of VRNDSCALE.
auto Round = cast<ConstantSDNode>(Op.getOperand(2)); SDValue RoundingMode = DAG.getNode(ISD::AND, dl, MVT::i32,
SDValue RoundingMode = Op.getOperand(2),
DAG.getTargetConstant(Round->getZExtValue() & 0xf, dl, MVT::i32); DAG.getConstant(0xf, dl, MVT::i32));
return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(),
Op.getOperand(1), RoundingMode); Op.getOperand(1), RoundingMode);
} }
@ -23416,9 +23417,9 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
assert(IntrData->Opc0 == X86ISD::VRNDSCALES && "Unexpected opcode"); assert(IntrData->Opc0 == X86ISD::VRNDSCALES && "Unexpected opcode");
// Clear the upper bits of the rounding immediate so that the legacy // Clear the upper bits of the rounding immediate so that the legacy
// intrinsic can't trigger the scaling behavior of VRNDSCALE. // intrinsic can't trigger the scaling behavior of VRNDSCALE.
auto Round = cast<ConstantSDNode>(Op.getOperand(3)); SDValue RoundingMode = DAG.getNode(ISD::AND, dl, MVT::i32,
SDValue RoundingMode = Op.getOperand(3),
DAG.getTargetConstant(Round->getZExtValue() & 0xf, dl, MVT::i32); DAG.getConstant(0xf, dl, MVT::i32));
return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2), RoundingMode); Op.getOperand(1), Op.getOperand(2), RoundingMode);
} }
@ -26095,7 +26096,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
(VT == MVT::v32i8 && Subtarget.hasInt256())) && (VT == MVT::v32i8 && Subtarget.hasInt256())) &&
!Subtarget.hasXOP()) { !Subtarget.hasXOP()) {
int NumElts = VT.getVectorNumElements(); int NumElts = VT.getVectorNumElements();
SDValue Cst8 = DAG.getTargetConstant(8, dl, MVT::i8); SDValue Cst8 = DAG.getConstant(8, dl, MVT::i8);
// Extend constant shift amount to vXi16 (it doesn't matter if the type // Extend constant shift amount to vXi16 (it doesn't matter if the type
// isn't legal). // isn't legal).
@ -26367,7 +26368,7 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
unsigned Op = (Opcode == ISD::ROTL ? X86ISD::VROTLI : X86ISD::VROTRI); unsigned Op = (Opcode == ISD::ROTL ? X86ISD::VROTLI : X86ISD::VROTRI);
uint64_t RotateAmt = EltBits[CstSplatIndex].urem(EltSizeInBits); uint64_t RotateAmt = EltBits[CstSplatIndex].urem(EltSizeInBits);
return DAG.getNode(Op, DL, VT, R, return DAG.getNode(Op, DL, VT, R,
DAG.getTargetConstant(RotateAmt, DL, MVT::i8)); DAG.getConstant(RotateAmt, DL, MVT::i8));
} }
// Else, fall-back on VPROLV/VPRORV. // Else, fall-back on VPROLV/VPRORV.
@ -26388,7 +26389,7 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
if (0 <= CstSplatIndex) { if (0 <= CstSplatIndex) {
uint64_t RotateAmt = EltBits[CstSplatIndex].urem(EltSizeInBits); uint64_t RotateAmt = EltBits[CstSplatIndex].urem(EltSizeInBits);
return DAG.getNode(X86ISD::VROTLI, DL, VT, R, return DAG.getNode(X86ISD::VROTLI, DL, VT, R,
DAG.getTargetConstant(RotateAmt, DL, MVT::i8)); DAG.getConstant(RotateAmt, DL, MVT::i8));
} }
// Use general rotate by variable (per-element). // Use general rotate by variable (per-element).
@ -26625,7 +26626,7 @@ X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
// If this is a canonical idempotent atomicrmw w/no uses, we have a better // If this is a canonical idempotent atomicrmw w/no uses, we have a better
// lowering available in lowerAtomicArith. // lowering available in lowerAtomicArith.
// TODO: push more cases through this path. // TODO: push more cases through this path.
if (auto *C = dyn_cast<ConstantInt>(AI->getValOperand())) if (auto *C = dyn_cast<ConstantInt>(AI->getValOperand()))
if (AI->getOperation() == AtomicRMWInst::Or && C->isZero() && if (AI->getOperation() == AtomicRMWInst::Or && C->isZero() &&
AI->use_empty()) AI->use_empty())
@ -26695,7 +26696,7 @@ bool X86TargetLowering::lowerAtomicLoadAsLoadSDNode(const LoadInst &LI) const {
/// Emit a locked operation on a stack location which does not change any /// Emit a locked operation on a stack location which does not change any
/// memory location, but does involve a lock prefix. Location is chosen to be /// memory location, but does involve a lock prefix. Location is chosen to be
/// a) very likely accessed only by a single thread to minimize cache traffic, /// a) very likely accessed only by a single thread to minimize cache traffic,
/// and b) definitely dereferenceable. Returns the new Chain result. /// and b) definitely dereferenceable. Returns the new Chain result.
static SDValue emitLockedStackOp(SelectionDAG &DAG, static SDValue emitLockedStackOp(SelectionDAG &DAG,
const X86Subtarget &Subtarget, const X86Subtarget &Subtarget,
SDValue Chain, SDLoc DL) { SDValue Chain, SDLoc DL) {
@ -26704,22 +26705,22 @@ static SDValue emitLockedStackOp(SelectionDAG &DAG,
// operations issued by the current processor. As such, the location // operations issued by the current processor. As such, the location
// referenced is not relevant for the ordering properties of the instruction. // referenced is not relevant for the ordering properties of the instruction.
// See: Intel® 64 and IA-32 ArchitecturesSoftware Developers Manual, // See: Intel® 64 and IA-32 ArchitecturesSoftware Developers Manual,
// 8.2.3.9 Loads and Stores Are Not Reordered with Locked Instructions // 8.2.3.9 Loads and Stores Are Not Reordered with Locked Instructions
// 2) Using an immediate operand appears to be the best encoding choice // 2) Using an immediate operand appears to be the best encoding choice
// here since it doesn't require an extra register. // here since it doesn't require an extra register.
// 3) OR appears to be very slightly faster than ADD. (Though, the difference // 3) OR appears to be very slightly faster than ADD. (Though, the difference
// is small enough it might just be measurement noise.) // is small enough it might just be measurement noise.)
// 4) When choosing offsets, there are several contributing factors: // 4) When choosing offsets, there are several contributing factors:
// a) If there's no redzone, we default to TOS. (We could allocate a cache // a) If there's no redzone, we default to TOS. (We could allocate a cache
// line aligned stack object to improve this case.) // line aligned stack object to improve this case.)
// b) To minimize our chances of introducing a false dependence, we prefer // b) To minimize our chances of introducing a false dependence, we prefer
// to offset the stack usage from TOS slightly. // to offset the stack usage from TOS slightly.
// c) To minimize concerns about cross thread stack usage - in particular, // c) To minimize concerns about cross thread stack usage - in particular,
// the idiomatic MyThreadPool.run([&StackVars]() {...}) pattern which // the idiomatic MyThreadPool.run([&StackVars]() {...}) pattern which
// captures state in the TOS frame and accesses it from many threads - // captures state in the TOS frame and accesses it from many threads -
// we want to use an offset such that the offset is in a distinct cache // we want to use an offset such that the offset is in a distinct cache
// line from the TOS frame. // line from the TOS frame.
// //
// For a general discussion of the tradeoffs and benchmark results, see: // For a general discussion of the tradeoffs and benchmark results, see:
// https://shipilev.net/blog/2014/on-the-fence-with-dependencies/ // https://shipilev.net/blog/2014/on-the-fence-with-dependencies/
@ -26772,7 +26773,7 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget &Subtarget,
if (Subtarget.hasMFence()) if (Subtarget.hasMFence())
return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0)); return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0));
SDValue Chain = Op.getOperand(0); SDValue Chain = Op.getOperand(0);
return emitLockedStackOp(DAG, Subtarget, Chain, dl); return emitLockedStackOp(DAG, Subtarget, Chain, dl);
} }
@ -27255,12 +27256,12 @@ static SDValue lowerAtomicArith(SDValue N, SelectionDAG &DAG,
// seq_cst which isn't SingleThread, everything just needs to be preserved // seq_cst which isn't SingleThread, everything just needs to be preserved
// during codegen and then dropped. Note that we expect (but don't assume), // during codegen and then dropped. Note that we expect (but don't assume),
// that orderings other than seq_cst and acq_rel have been canonicalized to // that orderings other than seq_cst and acq_rel have been canonicalized to
// a store or load. // a store or load.
if (AN->getOrdering() == AtomicOrdering::SequentiallyConsistent && if (AN->getOrdering() == AtomicOrdering::SequentiallyConsistent &&
AN->getSyncScopeID() == SyncScope::System) { AN->getSyncScopeID() == SyncScope::System) {
// Prefer a locked operation against a stack location to minimize cache // Prefer a locked operation against a stack location to minimize cache
// traffic. This assumes that stack locations are very likely to be // traffic. This assumes that stack locations are very likely to be
// accessed only by the owning thread. // accessed only by the owning thread.
SDValue NewChain = emitLockedStackOp(DAG, Subtarget, Chain, DL); SDValue NewChain = emitLockedStackOp(DAG, Subtarget, Chain, DL);
assert(!N->hasAnyUseOfValue(0)); assert(!N->hasAnyUseOfValue(0));
// NOTE: The getUNDEF is needed to give something for the unused result 0. // NOTE: The getUNDEF is needed to give something for the unused result 0.
@ -32635,7 +32636,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
Res = DAG.getBitcast(ShuffleVT, V1); Res = DAG.getBitcast(ShuffleVT, V1);
Res = DAG.getNode(X86ISD::VPERM2X128, DL, ShuffleVT, Res, Res = DAG.getNode(X86ISD::VPERM2X128, DL, ShuffleVT, Res,
DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT),
DAG.getTargetConstant(PermMask, DL, MVT::i8)); DAG.getConstant(PermMask, DL, MVT::i8));
return DAG.getBitcast(RootVT, Res); return DAG.getBitcast(RootVT, Res);
} }
@ -32742,7 +32743,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
return SDValue(); // Nothing to do! return SDValue(); // Nothing to do!
Res = DAG.getBitcast(ShuffleVT, V1); Res = DAG.getBitcast(ShuffleVT, V1);
Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res, Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res,
DAG.getTargetConstant(PermuteImm, DL, MVT::i8)); DAG.getConstant(PermuteImm, DL, MVT::i8));
return DAG.getBitcast(RootVT, Res); return DAG.getBitcast(RootVT, Res);
} }
} }
@ -32772,7 +32773,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
NewV1 = DAG.getBitcast(ShuffleVT, NewV1); NewV1 = DAG.getBitcast(ShuffleVT, NewV1);
NewV2 = DAG.getBitcast(ShuffleVT, NewV2); NewV2 = DAG.getBitcast(ShuffleVT, NewV2);
Res = DAG.getNode(Shuffle, DL, ShuffleVT, NewV1, NewV2, Res = DAG.getNode(Shuffle, DL, ShuffleVT, NewV1, NewV2,
DAG.getTargetConstant(PermuteImm, DL, MVT::i8)); DAG.getConstant(PermuteImm, DL, MVT::i8));
return DAG.getBitcast(RootVT, Res); return DAG.getBitcast(RootVT, Res);
} }
@ -32789,8 +32790,8 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
return SDValue(); // Nothing to do! return SDValue(); // Nothing to do!
V1 = DAG.getBitcast(IntMaskVT, V1); V1 = DAG.getBitcast(IntMaskVT, V1);
Res = DAG.getNode(X86ISD::EXTRQI, DL, IntMaskVT, V1, Res = DAG.getNode(X86ISD::EXTRQI, DL, IntMaskVT, V1,
DAG.getTargetConstant(BitLen, DL, MVT::i8), DAG.getConstant(BitLen, DL, MVT::i8),
DAG.getTargetConstant(BitIdx, DL, MVT::i8)); DAG.getConstant(BitIdx, DL, MVT::i8));
return DAG.getBitcast(RootVT, Res); return DAG.getBitcast(RootVT, Res);
} }
@ -32800,8 +32801,8 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
V1 = DAG.getBitcast(IntMaskVT, V1); V1 = DAG.getBitcast(IntMaskVT, V1);
V2 = DAG.getBitcast(IntMaskVT, V2); V2 = DAG.getBitcast(IntMaskVT, V2);
Res = DAG.getNode(X86ISD::INSERTQI, DL, IntMaskVT, V1, V2, Res = DAG.getNode(X86ISD::INSERTQI, DL, IntMaskVT, V1, V2,
DAG.getTargetConstant(BitLen, DL, MVT::i8), DAG.getConstant(BitLen, DL, MVT::i8),
DAG.getTargetConstant(BitIdx, DL, MVT::i8)); DAG.getConstant(BitIdx, DL, MVT::i8));
return DAG.getBitcast(RootVT, Res); return DAG.getBitcast(RootVT, Res);
} }
} }
@ -32965,7 +32966,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
V2 = DAG.getBitcast(MaskVT, V2); V2 = DAG.getBitcast(MaskVT, V2);
SDValue VPerm2MaskOp = getConstVector(VPerm2Idx, IntMaskVT, DAG, DL, true); SDValue VPerm2MaskOp = getConstVector(VPerm2Idx, IntMaskVT, DAG, DL, true);
Res = DAG.getNode(X86ISD::VPERMIL2, DL, MaskVT, V1, V2, VPerm2MaskOp, Res = DAG.getNode(X86ISD::VPERMIL2, DL, MaskVT, V1, V2, VPerm2MaskOp,
DAG.getTargetConstant(M2ZImm, DL, MVT::i8)); DAG.getConstant(M2ZImm, DL, MVT::i8));
return DAG.getBitcast(RootVT, Res); return DAG.getBitcast(RootVT, Res);
} }
@ -33784,7 +33785,7 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
return DAG.getBitcast( return DAG.getBitcast(
VT, DAG.getNode(X86ISD::BLENDI, DL, SrcVT, N0.getOperand(0), VT, DAG.getNode(X86ISD::BLENDI, DL, SrcVT, N0.getOperand(0),
N1.getOperand(0), N1.getOperand(0),
DAG.getTargetConstant(BlendMask, DL, MVT::i8))); DAG.getConstant(BlendMask, DL, MVT::i8)));
} }
} }
return SDValue(); return SDValue();
@ -33852,12 +33853,12 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
// If we zero out all elements from Op0 then we don't need to reference it. // If we zero out all elements from Op0 then we don't need to reference it.
if (((ZeroMask | (1u << DstIdx)) == 0xF) && !Op0.isUndef()) if (((ZeroMask | (1u << DstIdx)) == 0xF) && !Op0.isUndef())
return DAG.getNode(X86ISD::INSERTPS, DL, VT, DAG.getUNDEF(VT), Op1, return DAG.getNode(X86ISD::INSERTPS, DL, VT, DAG.getUNDEF(VT), Op1,
DAG.getTargetConstant(InsertPSMask, DL, MVT::i8)); DAG.getConstant(InsertPSMask, DL, MVT::i8));
// If we zero out the element from Op1 then we don't need to reference it. // If we zero out the element from Op1 then we don't need to reference it.
if ((ZeroMask & (1u << DstIdx)) && !Op1.isUndef()) if ((ZeroMask & (1u << DstIdx)) && !Op1.isUndef())
return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, DAG.getUNDEF(VT), return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, DAG.getUNDEF(VT),
DAG.getTargetConstant(InsertPSMask, DL, MVT::i8)); DAG.getConstant(InsertPSMask, DL, MVT::i8));
// Attempt to merge insertps Op1 with an inner target shuffle node. // Attempt to merge insertps Op1 with an inner target shuffle node.
SmallVector<int, 8> TargetMask1; SmallVector<int, 8> TargetMask1;
@ -33868,14 +33869,14 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
// Zero/UNDEF insertion - zero out element and remove dependency. // Zero/UNDEF insertion - zero out element and remove dependency.
InsertPSMask |= (1u << DstIdx); InsertPSMask |= (1u << DstIdx);
return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, DAG.getUNDEF(VT), return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, DAG.getUNDEF(VT),
DAG.getTargetConstant(InsertPSMask, DL, MVT::i8)); DAG.getConstant(InsertPSMask, DL, MVT::i8));
} }
// Update insertps mask srcidx and reference the source input directly. // Update insertps mask srcidx and reference the source input directly.
assert(0 <= M && M < 8 && "Shuffle index out of range"); assert(0 <= M && M < 8 && "Shuffle index out of range");
InsertPSMask = (InsertPSMask & 0x3f) | ((M & 0x3) << 6); InsertPSMask = (InsertPSMask & 0x3f) | ((M & 0x3) << 6);
Op1 = Ops1[M < 4 ? 0 : 1]; Op1 = Ops1[M < 4 ? 0 : 1];
return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, Op1, return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, Op1,
DAG.getTargetConstant(InsertPSMask, DL, MVT::i8)); DAG.getConstant(InsertPSMask, DL, MVT::i8));
} }
// Attempt to merge insertps Op0 with an inner target shuffle node. // Attempt to merge insertps Op0 with an inner target shuffle node.
@ -33918,7 +33919,7 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
if (Updated) if (Updated)
return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, Op1, return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, Op1,
DAG.getTargetConstant(InsertPSMask, DL, MVT::i8)); DAG.getConstant(InsertPSMask, DL, MVT::i8));
} }
// If we're inserting an element from a vbroadcast of a load, fold the // If we're inserting an element from a vbroadcast of a load, fold the
@ -33931,7 +33932,7 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0,
DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT,
Op1.getOperand(0)), Op1.getOperand(0)),
DAG.getTargetConstant(InsertPSMask & 0x3f, DL, MVT::i8)); DAG.getConstant(InsertPSMask & 0x3f, DL, MVT::i8));
return SDValue(); return SDValue();
} }
@ -34665,7 +34666,7 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
} }
SDLoc dl(Op); SDLoc dl(Op);
SDValue NewSA = TLO.DAG.getTargetConstant(Diff, dl, MVT::i8); SDValue NewSA = TLO.DAG.getConstant(Diff, dl, MVT::i8);
return TLO.CombineTo( return TLO.CombineTo(
Op, TLO.DAG.getNode(Opc, dl, VT, Src.getOperand(0), NewSA)); Op, TLO.DAG.getNode(Opc, dl, VT, Src.getOperand(0), NewSA));
} }
@ -34704,7 +34705,7 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
} }
SDLoc dl(Op); SDLoc dl(Op);
SDValue NewSA = TLO.DAG.getTargetConstant(Diff, dl, MVT::i8); SDValue NewSA = TLO.DAG.getConstant(Diff, dl, MVT::i8);
return TLO.CombineTo( return TLO.CombineTo(
Op, TLO.DAG.getNode(Opc, dl, VT, Src.getOperand(0), NewSA)); Op, TLO.DAG.getNode(Opc, dl, VT, Src.getOperand(0), NewSA));
} }
@ -35753,8 +35754,8 @@ static SDValue createMMXBuildVector(BuildVectorSDNode *BV, SelectionDAG &DAG,
unsigned ShufMask = (NumElts > 2 ? 0 : 0x44); unsigned ShufMask = (NumElts > 2 ? 0 : 0x44);
return DAG.getNode( return DAG.getNode(
ISD::INTRINSIC_WO_CHAIN, DL, MVT::x86mmx, ISD::INTRINSIC_WO_CHAIN, DL, MVT::x86mmx,
DAG.getTargetConstant(Intrinsic::x86_sse_pshuf_w, DL, MVT::i32), DAG.getConstant(Intrinsic::x86_sse_pshuf_w, DL, MVT::i32), Splat,
Splat, DAG.getTargetConstant(ShufMask, DL, MVT::i8)); DAG.getConstant(ShufMask, DL, MVT::i8));
} }
Ops.append(NumElts, Splat); Ops.append(NumElts, Splat);
} else { } else {
@ -36510,7 +36511,7 @@ static SDValue scalarizeExtEltFP(SDNode *ExtElt, SelectionDAG &DAG) {
} }
// TODO: This switch could include FNEG and the x86-specific FP logic ops // TODO: This switch could include FNEG and the x86-specific FP logic ops
// (FAND, FANDN, FOR, FXOR). But that may require enhancements to avoid // (FAND, FANDN, FOR, FXOR). But that may require enhancements to avoid
// missed load folding and fma+fneg combining. // missed load folding and fma+fneg combining.
switch (Vec.getOpcode()) { switch (Vec.getOpcode()) {
case ISD::FMA: // Begin 3 operands case ISD::FMA: // Begin 3 operands
@ -38911,7 +38912,7 @@ static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG,
if (NewShiftVal >= NumBitsPerElt) if (NewShiftVal >= NumBitsPerElt)
NewShiftVal = NumBitsPerElt - 1; NewShiftVal = NumBitsPerElt - 1;
return DAG.getNode(X86ISD::VSRAI, SDLoc(N), VT, N0.getOperand(0), return DAG.getNode(X86ISD::VSRAI, SDLoc(N), VT, N0.getOperand(0),
DAG.getTargetConstant(NewShiftVal, SDLoc(N), MVT::i8)); DAG.getConstant(NewShiftVal, SDLoc(N), MVT::i8));
} }
// We can decode 'whole byte' logical bit shifts as shuffles. // We can decode 'whole byte' logical bit shifts as shuffles.
@ -39031,7 +39032,7 @@ static SDValue combineCompareEqual(SDNode *N, SelectionDAG &DAG,
if (Subtarget.hasAVX512()) { if (Subtarget.hasAVX512()) {
SDValue FSetCC = SDValue FSetCC =
DAG.getNode(X86ISD::FSETCCM, DL, MVT::v1i1, CMP00, CMP01, DAG.getNode(X86ISD::FSETCCM, DL, MVT::v1i1, CMP00, CMP01,
DAG.getTargetConstant(x86cc, DL, MVT::i8)); DAG.getConstant(x86cc, DL, MVT::i8));
// Need to fill with zeros to ensure the bitcast will produce zeroes // Need to fill with zeros to ensure the bitcast will produce zeroes
// for the upper bits. An EXTRACT_ELEMENT here wouldn't guarantee that. // for the upper bits. An EXTRACT_ELEMENT here wouldn't guarantee that.
SDValue Ins = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, MVT::v16i1, SDValue Ins = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, MVT::v16i1,
@ -39040,9 +39041,10 @@ static SDValue combineCompareEqual(SDNode *N, SelectionDAG &DAG,
return DAG.getZExtOrTrunc(DAG.getBitcast(MVT::i16, Ins), DL, return DAG.getZExtOrTrunc(DAG.getBitcast(MVT::i16, Ins), DL,
N->getSimpleValueType(0)); N->getSimpleValueType(0));
} }
SDValue OnesOrZeroesF = SDValue OnesOrZeroesF = DAG.getNode(X86ISD::FSETCC, DL,
DAG.getNode(X86ISD::FSETCC, DL, CMP00.getValueType(), CMP00, CMP00.getValueType(), CMP00, CMP01,
CMP01, DAG.getTargetConstant(x86cc, DL, MVT::i8)); DAG.getConstant(x86cc, DL,
MVT::i8));
bool is64BitFP = (CMP00.getValueType() == MVT::f64); bool is64BitFP = (CMP00.getValueType() == MVT::f64);
MVT IntVT = is64BitFP ? MVT::i64 : MVT::i32; MVT IntVT = is64BitFP ? MVT::i64 : MVT::i32;
@ -39236,7 +39238,7 @@ static SDValue combineAndMaskToShift(SDNode *N, SelectionDAG &DAG,
SDLoc DL(N); SDLoc DL(N);
unsigned ShiftVal = SplatVal.countTrailingOnes(); unsigned ShiftVal = SplatVal.countTrailingOnes();
SDValue ShAmt = DAG.getTargetConstant(EltBitWidth - ShiftVal, DL, MVT::i8); SDValue ShAmt = DAG.getConstant(EltBitWidth - ShiftVal, DL, MVT::i8);
SDValue Shift = DAG.getNode(X86ISD::VSRLI, DL, VT0, Op0, ShAmt); SDValue Shift = DAG.getNode(X86ISD::VSRLI, DL, VT0, Op0, ShAmt);
return DAG.getBitcast(N->getValueType(0), Shift); return DAG.getBitcast(N->getValueType(0), Shift);
} }

View File

@ -753,14 +753,14 @@ let isCommutable = 1 in
def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst), def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
(ins VR128X:$src1, VR128X:$src2, u8imm:$src3), (ins VR128X:$src1, VR128X:$src2, u8imm:$src3),
"vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, timm:$src3))]>, [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, imm:$src3))]>,
EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>; EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst), def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
(ins VR128X:$src1, f32mem:$src2, u8imm:$src3), (ins VR128X:$src1, f32mem:$src2, u8imm:$src3),
"vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set VR128X:$dst, (X86insertps VR128X:$src1, [(set VR128X:$dst, (X86insertps VR128X:$src1,
(v4f32 (scalar_to_vector (loadf32 addr:$src2))), (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
timm:$src3))]>, imm:$src3))]>,
EVEX_4V, EVEX_CD8<32, CD8VT1>, EVEX_4V, EVEX_CD8<32, CD8VT1>,
Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>; Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
} }
@ -2054,9 +2054,9 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
(ins _.RC:$src1, _.RC:$src2, u8imm:$cc), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
"vcmp"#_.Suffix, "vcmp"#_.Suffix,
"$cc, $src2, $src1", "$src1, $src2, $cc", "$cc, $src2, $src1", "$src1, $src2, $cc",
(OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc),
(OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), (OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
timm:$cc)>, EVEX_4V, VEX_LIG, Sched<[sched]>; imm:$cc)>, EVEX_4V, VEX_LIG, Sched<[sched]>;
let mayLoad = 1 in let mayLoad = 1 in
defm rm_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, defm rm_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
(outs _.KRC:$dst), (outs _.KRC:$dst),
@ -2064,9 +2064,9 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
"vcmp"#_.Suffix, "vcmp"#_.Suffix,
"$cc, $src2, $src1", "$src1, $src2, $cc", "$cc, $src2, $src1", "$src1, $src2, $cc",
(OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2, (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
timm:$cc), imm:$cc),
(OpNode_su (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2, (OpNode_su (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
timm:$cc)>, EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>, imm:$cc)>, EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
Sched<[sched.Folded, sched.ReadAfterFold]>; Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rrb_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, defm rrb_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
@ -2075,9 +2075,9 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
"vcmp"#_.Suffix, "vcmp"#_.Suffix,
"$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc", "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc",
(OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
timm:$cc), imm:$cc),
(OpNodeSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), (OpNodeSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
timm:$cc)>, imm:$cc)>,
EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>; EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>;
let isCodeGenOnly = 1 in { let isCodeGenOnly = 1 in {
@ -2088,7 +2088,7 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
"\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
[(set _.KRC:$dst, (OpNode _.FRC:$src1, [(set _.KRC:$dst, (OpNode _.FRC:$src1,
_.FRC:$src2, _.FRC:$src2,
timm:$cc))]>, imm:$cc))]>,
EVEX_4V, VEX_LIG, Sched<[sched]>; EVEX_4V, VEX_LIG, Sched<[sched]>;
def rm : AVX512Ii8<0xC2, MRMSrcMem, def rm : AVX512Ii8<0xC2, MRMSrcMem,
(outs _.KRC:$dst), (outs _.KRC:$dst),
@ -2097,7 +2097,7 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
"\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
[(set _.KRC:$dst, (OpNode _.FRC:$src1, [(set _.KRC:$dst, (OpNode _.FRC:$src1,
(_.ScalarLdFrag addr:$src2), (_.ScalarLdFrag addr:$src2),
timm:$cc))]>, imm:$cc))]>,
EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>, EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
Sched<[sched.Folded, sched.ReadAfterFold]>; Sched<[sched.Folded, sched.ReadAfterFold]>;
} }
@ -2530,8 +2530,8 @@ multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
(outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,u8imm:$cc), (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,u8imm:$cc),
"vcmp"#_.Suffix, "vcmp"#_.Suffix,
"$cc, $src2, $src1", "$src1, $src2, $cc", "$cc, $src2, $src1", "$src1, $src2, $cc",
(X86cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), (X86cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc),
(X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), (X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc),
1>, Sched<[sched]>; 1>, Sched<[sched]>;
defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
@ -2539,9 +2539,9 @@ multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
"vcmp"#_.Suffix, "vcmp"#_.Suffix,
"$cc, $src2, $src1", "$src1, $src2, $cc", "$cc, $src2, $src1", "$src1, $src2, $cc",
(X86cmpm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), (X86cmpm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
timm:$cc), imm:$cc),
(X86cmpm_su (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), (X86cmpm_su (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
timm:$cc)>, imm:$cc)>,
Sched<[sched.Folded, sched.ReadAfterFold]>; Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
@ -2552,10 +2552,10 @@ multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
"$src1, ${src2}"#_.BroadcastStr#", $cc", "$src1, ${src2}"#_.BroadcastStr#", $cc",
(X86cmpm (_.VT _.RC:$src1), (X86cmpm (_.VT _.RC:$src1),
(_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))), (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
timm:$cc), imm:$cc),
(X86cmpm_su (_.VT _.RC:$src1), (X86cmpm_su (_.VT _.RC:$src1),
(_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))), (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
timm:$cc)>, imm:$cc)>,
EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
// Patterns for selecting with loads in other operand. // Patterns for selecting with loads in other operand.
@ -2592,9 +2592,9 @@ multiclass avx512_vcmp_sae<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
"vcmp"#_.Suffix, "vcmp"#_.Suffix,
"$cc, {sae}, $src2, $src1", "$cc, {sae}, $src2, $src1",
"$src1, $src2, {sae}, $cc", "$src1, $src2, {sae}, $cc",
(X86cmpmSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), (X86cmpmSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc),
(X86cmpmSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), (X86cmpmSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
timm:$cc)>, imm:$cc)>,
EVEX_B, Sched<[sched]>; EVEX_B, Sched<[sched]>;
} }
@ -2649,7 +2649,7 @@ multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr,
(ins _.RC:$src1, i32u8imm:$src2), (ins _.RC:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set _.KRC:$dst,(X86Vfpclasss (_.VT _.RC:$src1), [(set _.KRC:$dst,(X86Vfpclasss (_.VT _.RC:$src1),
(i32 timm:$src2)))]>, (i32 imm:$src2)))]>,
Sched<[sched]>; Sched<[sched]>;
def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
(ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
@ -2657,7 +2657,7 @@ multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr,
"\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
[(set _.KRC:$dst,(and _.KRCWM:$mask, [(set _.KRC:$dst,(and _.KRCWM:$mask,
(X86Vfpclasss_su (_.VT _.RC:$src1), (X86Vfpclasss_su (_.VT _.RC:$src1),
(i32 timm:$src2))))]>, (i32 imm:$src2))))]>,
EVEX_K, Sched<[sched]>; EVEX_K, Sched<[sched]>;
def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
(ins _.IntScalarMemOp:$src1, i32u8imm:$src2), (ins _.IntScalarMemOp:$src1, i32u8imm:$src2),
@ -2665,7 +2665,7 @@ multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}", "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set _.KRC:$dst, [(set _.KRC:$dst,
(X86Vfpclasss _.ScalarIntMemCPat:$src1, (X86Vfpclasss _.ScalarIntMemCPat:$src1,
(i32 timm:$src2)))]>, (i32 imm:$src2)))]>,
Sched<[sched.Folded, sched.ReadAfterFold]>; Sched<[sched.Folded, sched.ReadAfterFold]>;
def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
(ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2), (ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2),
@ -2673,7 +2673,7 @@ multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr,
"\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
[(set _.KRC:$dst,(and _.KRCWM:$mask, [(set _.KRC:$dst,(and _.KRCWM:$mask,
(X86Vfpclasss_su _.ScalarIntMemCPat:$src1, (X86Vfpclasss_su _.ScalarIntMemCPat:$src1,
(i32 timm:$src2))))]>, (i32 imm:$src2))))]>,
EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
} }
} }
@ -2689,7 +2689,7 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr,
(ins _.RC:$src1, i32u8imm:$src2), (ins _.RC:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set _.KRC:$dst,(X86Vfpclass (_.VT _.RC:$src1), [(set _.KRC:$dst,(X86Vfpclass (_.VT _.RC:$src1),
(i32 timm:$src2)))]>, (i32 imm:$src2)))]>,
Sched<[sched]>; Sched<[sched]>;
def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
(ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
@ -2697,7 +2697,7 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr,
"\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
[(set _.KRC:$dst,(and _.KRCWM:$mask, [(set _.KRC:$dst,(and _.KRCWM:$mask,
(X86Vfpclass_su (_.VT _.RC:$src1), (X86Vfpclass_su (_.VT _.RC:$src1),
(i32 timm:$src2))))]>, (i32 imm:$src2))))]>,
EVEX_K, Sched<[sched]>; EVEX_K, Sched<[sched]>;
def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
(ins _.MemOp:$src1, i32u8imm:$src2), (ins _.MemOp:$src1, i32u8imm:$src2),
@ -2705,7 +2705,7 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}", "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set _.KRC:$dst,(X86Vfpclass [(set _.KRC:$dst,(X86Vfpclass
(_.VT (_.LdFrag addr:$src1)), (_.VT (_.LdFrag addr:$src1)),
(i32 timm:$src2)))]>, (i32 imm:$src2)))]>,
Sched<[sched.Folded, sched.ReadAfterFold]>; Sched<[sched.Folded, sched.ReadAfterFold]>;
def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
(ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2), (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
@ -2713,7 +2713,7 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr,
"\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
[(set _.KRC:$dst, (and _.KRCWM:$mask, (X86Vfpclass_su [(set _.KRC:$dst, (and _.KRCWM:$mask, (X86Vfpclass_su
(_.VT (_.LdFrag addr:$src1)), (_.VT (_.LdFrag addr:$src1)),
(i32 timm:$src2))))]>, (i32 imm:$src2))))]>,
EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
(ins _.ScalarMemOp:$src1, i32u8imm:$src2), (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
@ -2723,7 +2723,7 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr,
[(set _.KRC:$dst,(X86Vfpclass [(set _.KRC:$dst,(X86Vfpclass
(_.VT (X86VBroadcast (_.VT (X86VBroadcast
(_.ScalarLdFrag addr:$src1))), (_.ScalarLdFrag addr:$src1))),
(i32 timm:$src2)))]>, (i32 imm:$src2)))]>,
EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
(ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2), (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2),
@ -2733,7 +2733,7 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr,
[(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclass_su [(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclass_su
(_.VT (X86VBroadcast (_.VT (X86VBroadcast
(_.ScalarLdFrag addr:$src1))), (_.ScalarLdFrag addr:$src1))),
(i32 timm:$src2))))]>, (i32 imm:$src2))))]>,
EVEX_B, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; EVEX_B, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
} }
@ -3111,7 +3111,7 @@ multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm), def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm),
!strconcat(OpcodeStr, !strconcat(OpcodeStr,
"\t{$imm, $src, $dst|$dst, $src, $imm}"), "\t{$imm, $src, $dst|$dst, $src, $imm}"),
[(set KRC:$dst, (OpNode KRC:$src, (i8 timm:$imm)))]>, [(set KRC:$dst, (OpNode KRC:$src, (i8 imm:$imm)))]>,
Sched<[sched]>; Sched<[sched]>;
} }
@ -3187,7 +3187,7 @@ multiclass axv512_cmp_packed_cc_no_vlx_lowering<SDNode OpNode, PatFrag OpNode_su
X86VectorVTInfo Narrow, X86VectorVTInfo Narrow,
X86VectorVTInfo Wide> { X86VectorVTInfo Wide> {
def : Pat<(Narrow.KVT (OpNode (Narrow.VT Narrow.RC:$src1), def : Pat<(Narrow.KVT (OpNode (Narrow.VT Narrow.RC:$src1),
(Narrow.VT Narrow.RC:$src2), timm:$cc)), (Narrow.VT Narrow.RC:$src2), imm:$cc)),
(COPY_TO_REGCLASS (COPY_TO_REGCLASS
(!cast<Instruction>(InstStr##Zrri) (!cast<Instruction>(InstStr##Zrri)
(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
@ -3196,7 +3196,7 @@ def : Pat<(Narrow.KVT (OpNode (Narrow.VT Narrow.RC:$src1),
def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
(OpNode_su (Narrow.VT Narrow.RC:$src1), (OpNode_su (Narrow.VT Narrow.RC:$src1),
(Narrow.VT Narrow.RC:$src2), timm:$cc))), (Narrow.VT Narrow.RC:$src2), imm:$cc))),
(COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrrik) (COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrrik)
(COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
@ -5787,13 +5787,13 @@ multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst), defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst),
(ins _.RC:$src1, u8imm:$src2), OpcodeStr, (ins _.RC:$src1, u8imm:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2", "$src2, $src1", "$src1, $src2",
(_.VT (OpNode _.RC:$src1, (i8 timm:$src2)))>, (_.VT (OpNode _.RC:$src1, (i8 imm:$src2)))>,
Sched<[sched]>; Sched<[sched]>;
defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst), defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
(ins _.MemOp:$src1, u8imm:$src2), OpcodeStr, (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2", "$src2, $src1", "$src1, $src2",
(_.VT (OpNode (_.VT (_.LdFrag addr:$src1)), (_.VT (OpNode (_.VT (_.LdFrag addr:$src1)),
(i8 timm:$src2)))>, (i8 imm:$src2)))>,
Sched<[sched.Folded]>; Sched<[sched.Folded]>;
} }
} }
@ -5805,7 +5805,7 @@ multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst), defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
(ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr, (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr,
"$src2, ${src1}"##_.BroadcastStr, "${src1}"##_.BroadcastStr##", $src2", "$src2, ${src1}"##_.BroadcastStr, "${src1}"##_.BroadcastStr##", $src2",
(_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src1)), (i8 timm:$src2)))>, (_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src1)), (i8 imm:$src2)))>,
EVEX_B, Sched<[sched.Folded]>; EVEX_B, Sched<[sched.Folded]>;
} }
@ -5947,13 +5947,13 @@ let Predicates = [HasAVX512, NoVLX] in {
(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
VR128X:$src2)), sub_xmm)>; VR128X:$src2)), sub_xmm)>;
def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 timm:$src2))), def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 imm:$src2))),
(EXTRACT_SUBREG (v8i64 (EXTRACT_SUBREG (v8i64
(VPSRAQZri (VPSRAQZri
(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
imm:$src2)), sub_ymm)>; imm:$src2)), sub_ymm)>;
def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 timm:$src2))), def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 imm:$src2))),
(EXTRACT_SUBREG (v8i64 (EXTRACT_SUBREG (v8i64
(VPSRAQZri (VPSRAQZri
(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
@ -6098,23 +6098,23 @@ let Predicates = [HasAVX512, NoVLX] in {
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
sub_ymm)>; sub_ymm)>;
def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 timm:$src2))), def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 imm:$src2))),
(EXTRACT_SUBREG (v8i64 (EXTRACT_SUBREG (v8i64
(VPROLQZri (VPROLQZri
(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
imm:$src2)), sub_xmm)>; imm:$src2)), sub_xmm)>;
def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 timm:$src2))), def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 imm:$src2))),
(EXTRACT_SUBREG (v8i64 (EXTRACT_SUBREG (v8i64
(VPROLQZri (VPROLQZri
(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
imm:$src2)), sub_ymm)>; imm:$src2)), sub_ymm)>;
def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 timm:$src2))), def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 imm:$src2))),
(EXTRACT_SUBREG (v16i32 (EXTRACT_SUBREG (v16i32
(VPROLDZri (VPROLDZri
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
imm:$src2)), sub_xmm)>; imm:$src2)), sub_xmm)>;
def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 timm:$src2))), def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 imm:$src2))),
(EXTRACT_SUBREG (v16i32 (EXTRACT_SUBREG (v16i32
(VPROLDZri (VPROLDZri
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
@ -6149,23 +6149,23 @@ let Predicates = [HasAVX512, NoVLX] in {
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
sub_ymm)>; sub_ymm)>;
def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 timm:$src2))), def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 imm:$src2))),
(EXTRACT_SUBREG (v8i64 (EXTRACT_SUBREG (v8i64
(VPRORQZri (VPRORQZri
(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
imm:$src2)), sub_xmm)>; imm:$src2)), sub_xmm)>;
def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 timm:$src2))), def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 imm:$src2))),
(EXTRACT_SUBREG (v8i64 (EXTRACT_SUBREG (v8i64
(VPRORQZri (VPRORQZri
(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
imm:$src2)), sub_ymm)>; imm:$src2)), sub_ymm)>;
def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 timm:$src2))), def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 imm:$src2))),
(EXTRACT_SUBREG (v16i32 (EXTRACT_SUBREG (v16i32
(VPRORDZri (VPRORDZri
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
imm:$src2)), sub_xmm)>; imm:$src2)), sub_xmm)>;
def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 timm:$src2))), def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 imm:$src2))),
(EXTRACT_SUBREG (v16i32 (EXTRACT_SUBREG (v16i32
(VPRORDZri (VPRORDZri
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
@ -8612,21 +8612,21 @@ let ExeDomain = GenericDomain in {
(ins _src.RC:$src1, i32u8imm:$src2), (ins _src.RC:$src1, i32u8imm:$src2),
"vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set _dest.RC:$dst, [(set _dest.RC:$dst,
(X86cvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>, (X86cvtps2ph (_src.VT _src.RC:$src1), (i32 imm:$src2)))]>,
Sched<[RR]>; Sched<[RR]>;
let Constraints = "$src0 = $dst" in let Constraints = "$src0 = $dst" in
def rrk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst), def rrk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
(ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
"vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
[(set _dest.RC:$dst, [(set _dest.RC:$dst,
(X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2), (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 imm:$src2),
_dest.RC:$src0, _src.KRCWM:$mask))]>, _dest.RC:$src0, _src.KRCWM:$mask))]>,
Sched<[RR]>, EVEX_K; Sched<[RR]>, EVEX_K;
def rrkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst), def rrkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
(ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
"vcvtps2ph\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}", "vcvtps2ph\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}",
[(set _dest.RC:$dst, [(set _dest.RC:$dst,
(X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2), (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 imm:$src2),
_dest.ImmAllZerosV, _src.KRCWM:$mask))]>, _dest.ImmAllZerosV, _src.KRCWM:$mask))]>,
Sched<[RR]>, EVEX_KZ; Sched<[RR]>, EVEX_KZ;
let hasSideEffects = 0, mayStore = 1 in { let hasSideEffects = 0, mayStore = 1 in {
@ -9085,14 +9085,14 @@ multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
(ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr, (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
"$src3, $src2, $src1", "$src1, $src2, $src3", "$src3, $src2, $src1", "$src1, $src2, $src3",
(_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2), (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
(i32 timm:$src3)))>, (i32 imm:$src3)))>,
Sched<[sched]>; Sched<[sched]>;
defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr, (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
"$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3", "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3",
(_.VT (X86RndScalesSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), (_.VT (X86RndScalesSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
(i32 timm:$src3)))>, EVEX_B, (i32 imm:$src3)))>, EVEX_B,
Sched<[sched]>; Sched<[sched]>;
defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
@ -9100,7 +9100,7 @@ multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
OpcodeStr, OpcodeStr,
"$src3, $src2, $src1", "$src1, $src2, $src3", "$src3, $src2, $src1", "$src1, $src2, $src3",
(_.VT (X86RndScales _.RC:$src1, (_.VT (X86RndScales _.RC:$src1,
_.ScalarIntMemCPat:$src2, (i32 timm:$src3)))>, _.ScalarIntMemCPat:$src2, (i32 imm:$src3)))>,
Sched<[sched.Folded, sched.ReadAfterFold]>; Sched<[sched.Folded, sched.ReadAfterFold]>;
let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in { let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in {
@ -9118,13 +9118,13 @@ multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
} }
let Predicates = [HasAVX512] in { let Predicates = [HasAVX512] in {
def : Pat<(X86VRndScale _.FRC:$src1, timm:$src2), def : Pat<(X86VRndScale _.FRC:$src1, imm:$src2),
(_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)), (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
_.FRC:$src1, imm:$src2))>; _.FRC:$src1, imm:$src2))>;
} }
let Predicates = [HasAVX512, OptForSize] in { let Predicates = [HasAVX512, OptForSize] in {
def : Pat<(X86VRndScale (_.ScalarLdFrag addr:$src1), timm:$src2), def : Pat<(X86VRndScale (_.ScalarLdFrag addr:$src1), imm:$src2),
(_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)), (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
addr:$src1, imm:$src2))>; addr:$src1, imm:$src2))>;
} }
@ -10145,19 +10145,19 @@ multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNo
(ins _.RC:$src1, i32u8imm:$src2), (ins _.RC:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2", OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
(OpNode (_.VT _.RC:$src1), (OpNode (_.VT _.RC:$src1),
(i32 timm:$src2))>, Sched<[sched]>; (i32 imm:$src2))>, Sched<[sched]>;
defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.MemOp:$src1, i32u8imm:$src2), (ins _.MemOp:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2", OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
(OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))), (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
(i32 timm:$src2))>, (i32 imm:$src2))>,
Sched<[sched.Folded, sched.ReadAfterFold]>; Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.ScalarMemOp:$src1, i32u8imm:$src2), (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix, "$src2, ${src1}"##_.BroadcastStr, OpcodeStr##_.Suffix, "$src2, ${src1}"##_.BroadcastStr,
"${src1}"##_.BroadcastStr##", $src2", "${src1}"##_.BroadcastStr##", $src2",
(OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src1))), (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src1))),
(i32 timm:$src2))>, EVEX_B, (i32 imm:$src2))>, EVEX_B,
Sched<[sched.Folded, sched.ReadAfterFold]>; Sched<[sched.Folded, sched.ReadAfterFold]>;
} }
} }
@ -10172,7 +10172,7 @@ multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
OpcodeStr##_.Suffix, "$src2, {sae}, $src1", OpcodeStr##_.Suffix, "$src2, {sae}, $src1",
"$src1, {sae}, $src2", "$src1, {sae}, $src2",
(OpNode (_.VT _.RC:$src1), (OpNode (_.VT _.RC:$src1),
(i32 timm:$src2))>, (i32 imm:$src2))>,
EVEX_B, Sched<[sched]>; EVEX_B, Sched<[sched]>;
} }
@ -10205,14 +10205,14 @@ multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
(OpNode (_.VT _.RC:$src1), (OpNode (_.VT _.RC:$src1),
(_.VT _.RC:$src2), (_.VT _.RC:$src2),
(i32 timm:$src3))>, (i32 imm:$src3))>,
Sched<[sched]>; Sched<[sched]>;
defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3), (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3),
OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
(OpNode (_.VT _.RC:$src1), (OpNode (_.VT _.RC:$src1),
(_.VT (bitconvert (_.LdFrag addr:$src2))), (_.VT (bitconvert (_.LdFrag addr:$src2))),
(i32 timm:$src3))>, (i32 imm:$src3))>,
Sched<[sched.Folded, sched.ReadAfterFold]>; Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3), (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
@ -10220,7 +10220,7 @@ multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
"$src1, ${src2}"##_.BroadcastStr##", $src3", "$src1, ${src2}"##_.BroadcastStr##", $src3",
(OpNode (_.VT _.RC:$src1), (OpNode (_.VT _.RC:$src1),
(_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))), (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
(i32 timm:$src3))>, EVEX_B, (i32 imm:$src3))>, EVEX_B,
Sched<[sched.Folded, sched.ReadAfterFold]>; Sched<[sched.Folded, sched.ReadAfterFold]>;
} }
} }
@ -10236,7 +10236,7 @@ multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1), (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
(SrcInfo.VT SrcInfo.RC:$src2), (SrcInfo.VT SrcInfo.RC:$src2),
(i8 timm:$src3)))>, (i8 imm:$src3)))>,
Sched<[sched]>; Sched<[sched]>;
defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst), defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
(ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3), (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3),
@ -10244,7 +10244,7 @@ multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1), (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
(SrcInfo.VT (bitconvert (SrcInfo.VT (bitconvert
(SrcInfo.LdFrag addr:$src2))), (SrcInfo.LdFrag addr:$src2))),
(i8 timm:$src3)))>, (i8 imm:$src3)))>,
Sched<[sched.Folded, sched.ReadAfterFold]>; Sched<[sched.Folded, sched.ReadAfterFold]>;
} }
} }
@ -10263,7 +10263,7 @@ multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
"$src1, ${src2}"##_.BroadcastStr##", $src3", "$src1, ${src2}"##_.BroadcastStr##", $src3",
(OpNode (_.VT _.RC:$src1), (OpNode (_.VT _.RC:$src1),
(_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))), (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
(i8 timm:$src3))>, EVEX_B, (i8 imm:$src3))>, EVEX_B,
Sched<[sched.Folded, sched.ReadAfterFold]>; Sched<[sched.Folded, sched.ReadAfterFold]>;
} }
@ -10277,7 +10277,7 @@ multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
(OpNode (_.VT _.RC:$src1), (OpNode (_.VT _.RC:$src1),
(_.VT _.RC:$src2), (_.VT _.RC:$src2),
(i32 timm:$src3))>, (i32 imm:$src3))>,
Sched<[sched]>; Sched<[sched]>;
defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3), (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
@ -10301,7 +10301,7 @@ multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
"$src1, $src2, {sae}, $src3", "$src1, $src2, {sae}, $src3",
(OpNode (_.VT _.RC:$src1), (OpNode (_.VT _.RC:$src1),
(_.VT _.RC:$src2), (_.VT _.RC:$src2),
(i32 timm:$src3))>, (i32 imm:$src3))>,
EVEX_B, Sched<[sched]>; EVEX_B, Sched<[sched]>;
} }
@ -10315,7 +10315,7 @@ multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode
"$src1, $src2, {sae}, $src3", "$src1, $src2, {sae}, $src3",
(OpNode (_.VT _.RC:$src1), (OpNode (_.VT _.RC:$src1),
(_.VT _.RC:$src2), (_.VT _.RC:$src2),
(i32 timm:$src3))>, (i32 imm:$src3))>,
EVEX_B, Sched<[sched]>; EVEX_B, Sched<[sched]>;
} }
@ -10437,7 +10437,7 @@ multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr,
OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
(_.VT (bitconvert (_.VT (bitconvert
(CastInfo.VT (X86Shuf128 _.RC:$src1, _.RC:$src2, (CastInfo.VT (X86Shuf128 _.RC:$src1, _.RC:$src2,
(i8 timm:$src3)))))>, (i8 imm:$src3)))))>,
Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr">; Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr">;
defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
@ -10446,7 +10446,7 @@ multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr,
(bitconvert (bitconvert
(CastInfo.VT (X86Shuf128 _.RC:$src1, (CastInfo.VT (X86Shuf128 _.RC:$src1,
(CastInfo.LdFrag addr:$src2), (CastInfo.LdFrag addr:$src2),
(i8 timm:$src3)))))>, (i8 imm:$src3)))))>,
Sched<[sched.Folded, sched.ReadAfterFold]>, Sched<[sched.Folded, sched.ReadAfterFold]>,
EVEX2VEXOverride<EVEX2VEXOvrd#"rm">; EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
@ -10458,7 +10458,7 @@ multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr,
(CastInfo.VT (CastInfo.VT
(X86Shuf128 _.RC:$src1, (X86Shuf128 _.RC:$src1,
(X86VBroadcast (_.ScalarLdFrag addr:$src2)), (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
(i8 timm:$src3)))))>, EVEX_B, (i8 imm:$src3)))))>, EVEX_B,
Sched<[sched.Folded, sched.ReadAfterFold]>; Sched<[sched.Folded, sched.ReadAfterFold]>;
} }
} }
@ -10527,14 +10527,14 @@ multiclass avx512_valign<bits<8> opc, string OpcodeStr,
defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2, u8imm:$src3), (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
(_.VT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 timm:$src3)))>, (_.VT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 imm:$src3)))>,
Sched<[sched]>, EVEX2VEXOverride<"VPALIGNRrri">; Sched<[sched]>, EVEX2VEXOverride<"VPALIGNRrri">;
defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
(_.VT (X86VAlign _.RC:$src1, (_.VT (X86VAlign _.RC:$src1,
(bitconvert (_.LdFrag addr:$src2)), (bitconvert (_.LdFrag addr:$src2)),
(i8 timm:$src3)))>, (i8 imm:$src3)))>,
Sched<[sched.Folded, sched.ReadAfterFold]>, Sched<[sched.Folded, sched.ReadAfterFold]>,
EVEX2VEXOverride<"VPALIGNRrmi">; EVEX2VEXOverride<"VPALIGNRrmi">;
@ -10544,7 +10544,7 @@ multiclass avx512_valign<bits<8> opc, string OpcodeStr,
"$src1, ${src2}"##_.BroadcastStr##", $src3", "$src1, ${src2}"##_.BroadcastStr##", $src3",
(X86VAlign _.RC:$src1, (X86VAlign _.RC:$src1,
(_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))), (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
(i8 timm:$src3))>, EVEX_B, (i8 imm:$src3))>, EVEX_B,
Sched<[sched.Folded, sched.ReadAfterFold]>; Sched<[sched.Folded, sched.ReadAfterFold]>;
} }
} }
@ -10593,7 +10593,7 @@ multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
def : Pat<(To.VT (vselect To.KRCWM:$mask, def : Pat<(To.VT (vselect To.KRCWM:$mask,
(bitconvert (bitconvert
(From.VT (OpNode From.RC:$src1, From.RC:$src2, (From.VT (OpNode From.RC:$src1, From.RC:$src2,
timm:$src3))), imm:$src3))),
To.RC:$src0)), To.RC:$src0)),
(!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask, (!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask,
To.RC:$src1, To.RC:$src2, To.RC:$src1, To.RC:$src2,
@ -10602,7 +10602,7 @@ multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
def : Pat<(To.VT (vselect To.KRCWM:$mask, def : Pat<(To.VT (vselect To.KRCWM:$mask,
(bitconvert (bitconvert
(From.VT (OpNode From.RC:$src1, From.RC:$src2, (From.VT (OpNode From.RC:$src1, From.RC:$src2,
timm:$src3))), imm:$src3))),
To.ImmAllZerosV)), To.ImmAllZerosV)),
(!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask, (!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask,
To.RC:$src1, To.RC:$src2, To.RC:$src1, To.RC:$src2,
@ -10612,7 +10612,7 @@ multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
(bitconvert (bitconvert
(From.VT (OpNode From.RC:$src1, (From.VT (OpNode From.RC:$src1,
(From.LdFrag addr:$src2), (From.LdFrag addr:$src2),
timm:$src3))), imm:$src3))),
To.RC:$src0)), To.RC:$src0)),
(!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask, (!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask,
To.RC:$src1, addr:$src2, To.RC:$src1, addr:$src2,
@ -10622,7 +10622,7 @@ multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
(bitconvert (bitconvert
(From.VT (OpNode From.RC:$src1, (From.VT (OpNode From.RC:$src1,
(From.LdFrag addr:$src2), (From.LdFrag addr:$src2),
timm:$src3))), imm:$src3))),
To.ImmAllZerosV)), To.ImmAllZerosV)),
(!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask, (!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask,
To.RC:$src1, addr:$src2, To.RC:$src1, addr:$src2,
@ -10637,7 +10637,7 @@ multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode,
def : Pat<(From.VT (OpNode From.RC:$src1, def : Pat<(From.VT (OpNode From.RC:$src1,
(bitconvert (To.VT (X86VBroadcast (bitconvert (To.VT (X86VBroadcast
(To.ScalarLdFrag addr:$src2)))), (To.ScalarLdFrag addr:$src2)))),
timm:$src3)), imm:$src3)),
(!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2, (!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2,
(ImmXForm imm:$src3))>; (ImmXForm imm:$src3))>;
@ -10647,7 +10647,7 @@ multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode,
(bitconvert (bitconvert
(To.VT (X86VBroadcast (To.VT (X86VBroadcast
(To.ScalarLdFrag addr:$src2)))), (To.ScalarLdFrag addr:$src2)))),
timm:$src3))), imm:$src3))),
To.RC:$src0)), To.RC:$src0)),
(!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask, (!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask,
To.RC:$src1, addr:$src2, To.RC:$src1, addr:$src2,
@ -10659,7 +10659,7 @@ multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode,
(bitconvert (bitconvert
(To.VT (X86VBroadcast (To.VT (X86VBroadcast
(To.ScalarLdFrag addr:$src2)))), (To.ScalarLdFrag addr:$src2)))),
timm:$src3))), imm:$src3))),
To.ImmAllZerosV)), To.ImmAllZerosV)),
(!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask, (!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask,
To.RC:$src1, addr:$src2, To.RC:$src1, addr:$src2,
@ -11103,14 +11103,14 @@ multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr,
def rr : AVX512<opc, MRMr, def rr : AVX512<opc, MRMr,
(outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2), (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 timm:$src2))))]>, [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 imm:$src2))))]>,
Sched<[sched]>; Sched<[sched]>;
def rm : AVX512<opc, MRMm, def rm : AVX512<opc, MRMm,
(outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2), (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set _.RC:$dst,(_.VT (OpNode [(set _.RC:$dst,(_.VT (OpNode
(_.VT (bitconvert (_.LdFrag addr:$src1))), (_.VT (bitconvert (_.LdFrag addr:$src1))),
(i8 timm:$src2))))]>, (i8 imm:$src2))))]>,
Sched<[sched.Folded, sched.ReadAfterFold]>; Sched<[sched.Folded, sched.ReadAfterFold]>;
} }
@ -11243,7 +11243,7 @@ multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
(OpNode (_.VT _.RC:$src1), (OpNode (_.VT _.RC:$src1),
(_.VT _.RC:$src2), (_.VT _.RC:$src2),
(_.VT _.RC:$src3), (_.VT _.RC:$src3),
(i8 timm:$src4)), 1, 1>, (i8 imm:$src4)), 1, 1>,
AVX512AIi8Base, EVEX_4V, Sched<[sched]>; AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4), (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4),
@ -11251,7 +11251,7 @@ multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
(OpNode (_.VT _.RC:$src1), (OpNode (_.VT _.RC:$src1),
(_.VT _.RC:$src2), (_.VT _.RC:$src2),
(_.VT (bitconvert (_.LdFrag addr:$src3))), (_.VT (bitconvert (_.LdFrag addr:$src3))),
(i8 timm:$src4)), 1, 0>, (i8 imm:$src4)), 1, 0>,
AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
Sched<[sched.Folded, sched.ReadAfterFold]>; Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
@ -11261,31 +11261,31 @@ multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
(OpNode (_.VT _.RC:$src1), (OpNode (_.VT _.RC:$src1),
(_.VT _.RC:$src2), (_.VT _.RC:$src2),
(_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))), (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
(i8 timm:$src4)), 1, 0>, EVEX_B, (i8 imm:$src4)), 1, 0>, EVEX_B,
AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
Sched<[sched.Folded, sched.ReadAfterFold]>; Sched<[sched.Folded, sched.ReadAfterFold]>;
}// Constraints = "$src1 = $dst" }// Constraints = "$src1 = $dst"
// Additional patterns for matching passthru operand in other positions. // Additional patterns for matching passthru operand in other positions.
def : Pat<(_.VT (vselect _.KRCWM:$mask, def : Pat<(_.VT (vselect _.KRCWM:$mask,
(OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
_.RC:$src1)), _.RC:$src1)),
(!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask, (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 imm:$src4))>; _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
def : Pat<(_.VT (vselect _.KRCWM:$mask, def : Pat<(_.VT (vselect _.KRCWM:$mask,
(OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 timm:$src4)), (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 imm:$src4)),
_.RC:$src1)), _.RC:$src1)),
(!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask, (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 imm:$src4))>; _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
// Additional patterns for matching loads in other positions. // Additional patterns for matching loads in other positions.
def : Pat<(_.VT (OpNode (bitconvert (_.LdFrag addr:$src3)), def : Pat<(_.VT (OpNode (bitconvert (_.LdFrag addr:$src3)),
_.RC:$src2, _.RC:$src1, (i8 timm:$src4))), _.RC:$src2, _.RC:$src1, (i8 imm:$src4))),
(!cast<Instruction>(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2, (!cast<Instruction>(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>; addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
def : Pat<(_.VT (OpNode _.RC:$src1, def : Pat<(_.VT (OpNode _.RC:$src1,
(bitconvert (_.LdFrag addr:$src3)), (bitconvert (_.LdFrag addr:$src3)),
_.RC:$src2, (i8 timm:$src4))), _.RC:$src2, (i8 imm:$src4))),
(!cast<Instruction>(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2, (!cast<Instruction>(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>; addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
@ -11293,13 +11293,13 @@ multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
// positions. // positions.
def : Pat<(_.VT (vselect _.KRCWM:$mask, def : Pat<(_.VT (vselect _.KRCWM:$mask,
(OpNode (bitconvert (_.LdFrag addr:$src3)), (OpNode (bitconvert (_.LdFrag addr:$src3)),
_.RC:$src2, _.RC:$src1, (i8 timm:$src4)), _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
_.ImmAllZerosV)), _.ImmAllZerosV)),
(!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask, (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>; _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
def : Pat<(_.VT (vselect _.KRCWM:$mask, def : Pat<(_.VT (vselect _.KRCWM:$mask,
(OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)), (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
_.RC:$src2, (i8 timm:$src4)), _.RC:$src2, (i8 imm:$src4)),
_.ImmAllZerosV)), _.ImmAllZerosV)),
(!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask, (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>; _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
@ -11308,43 +11308,43 @@ multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
// operand orders. // operand orders.
def : Pat<(_.VT (vselect _.KRCWM:$mask, def : Pat<(_.VT (vselect _.KRCWM:$mask,
(OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)), (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
_.RC:$src2, (i8 timm:$src4)), _.RC:$src2, (i8 imm:$src4)),
_.RC:$src1)), _.RC:$src1)),
(!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>; _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
def : Pat<(_.VT (vselect _.KRCWM:$mask, def : Pat<(_.VT (vselect _.KRCWM:$mask,
(OpNode (bitconvert (_.LdFrag addr:$src3)), (OpNode (bitconvert (_.LdFrag addr:$src3)),
_.RC:$src2, _.RC:$src1, (i8 timm:$src4)), _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
_.RC:$src1)), _.RC:$src1)),
(!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>; _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
def : Pat<(_.VT (vselect _.KRCWM:$mask, def : Pat<(_.VT (vselect _.KRCWM:$mask,
(OpNode _.RC:$src2, _.RC:$src1, (OpNode _.RC:$src2, _.RC:$src1,
(bitconvert (_.LdFrag addr:$src3)), (i8 timm:$src4)), (bitconvert (_.LdFrag addr:$src3)), (i8 imm:$src4)),
_.RC:$src1)), _.RC:$src1)),
(!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>; _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
def : Pat<(_.VT (vselect _.KRCWM:$mask, def : Pat<(_.VT (vselect _.KRCWM:$mask,
(OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)), (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)),
_.RC:$src1, (i8 timm:$src4)), _.RC:$src1, (i8 imm:$src4)),
_.RC:$src1)), _.RC:$src1)),
(!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>; _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>;
def : Pat<(_.VT (vselect _.KRCWM:$mask, def : Pat<(_.VT (vselect _.KRCWM:$mask,
(OpNode (bitconvert (_.LdFrag addr:$src3)), (OpNode (bitconvert (_.LdFrag addr:$src3)),
_.RC:$src1, _.RC:$src2, (i8 timm:$src4)), _.RC:$src1, _.RC:$src2, (i8 imm:$src4)),
_.RC:$src1)), _.RC:$src1)),
(!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>; _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>;
// Additional patterns for matching broadcasts in other positions. // Additional patterns for matching broadcasts in other positions.
def : Pat<(_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)), def : Pat<(_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
_.RC:$src2, _.RC:$src1, (i8 timm:$src4))), _.RC:$src2, _.RC:$src1, (i8 imm:$src4))),
(!cast<Instruction>(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2, (!cast<Instruction>(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>; addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
def : Pat<(_.VT (OpNode _.RC:$src1, def : Pat<(_.VT (OpNode _.RC:$src1,
(X86VBroadcast (_.ScalarLdFrag addr:$src3)), (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
_.RC:$src2, (i8 timm:$src4))), _.RC:$src2, (i8 imm:$src4))),
(!cast<Instruction>(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2, (!cast<Instruction>(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>; addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
@ -11352,7 +11352,7 @@ multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
// positions. // positions.
def : Pat<(_.VT (vselect _.KRCWM:$mask, def : Pat<(_.VT (vselect _.KRCWM:$mask,
(OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)), (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
_.RC:$src2, _.RC:$src1, (i8 timm:$src4)), _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
_.ImmAllZerosV)), _.ImmAllZerosV)),
(!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1, (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
_.KRCWM:$mask, _.RC:$src2, addr:$src3, _.KRCWM:$mask, _.RC:$src2, addr:$src3,
@ -11360,7 +11360,7 @@ multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
def : Pat<(_.VT (vselect _.KRCWM:$mask, def : Pat<(_.VT (vselect _.KRCWM:$mask,
(OpNode _.RC:$src1, (OpNode _.RC:$src1,
(X86VBroadcast (_.ScalarLdFrag addr:$src3)), (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
_.RC:$src2, (i8 timm:$src4)), _.RC:$src2, (i8 imm:$src4)),
_.ImmAllZerosV)), _.ImmAllZerosV)),
(!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1, (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
_.KRCWM:$mask, _.RC:$src2, addr:$src3, _.KRCWM:$mask, _.RC:$src2, addr:$src3,
@ -11371,32 +11371,32 @@ multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
def : Pat<(_.VT (vselect _.KRCWM:$mask, def : Pat<(_.VT (vselect _.KRCWM:$mask,
(OpNode _.RC:$src1, (OpNode _.RC:$src1,
(X86VBroadcast (_.ScalarLdFrag addr:$src3)), (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
_.RC:$src2, (i8 timm:$src4)), _.RC:$src2, (i8 imm:$src4)),
_.RC:$src1)), _.RC:$src1)),
(!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>; _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
def : Pat<(_.VT (vselect _.KRCWM:$mask, def : Pat<(_.VT (vselect _.KRCWM:$mask,
(OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)), (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
_.RC:$src2, _.RC:$src1, (i8 timm:$src4)), _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
_.RC:$src1)), _.RC:$src1)),
(!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>; _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
def : Pat<(_.VT (vselect _.KRCWM:$mask, def : Pat<(_.VT (vselect _.KRCWM:$mask,
(OpNode _.RC:$src2, _.RC:$src1, (OpNode _.RC:$src2, _.RC:$src1,
(X86VBroadcast (_.ScalarLdFrag addr:$src3)), (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
(i8 timm:$src4)), _.RC:$src1)), (i8 imm:$src4)), _.RC:$src1)),
(!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>; _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
def : Pat<(_.VT (vselect _.KRCWM:$mask, def : Pat<(_.VT (vselect _.KRCWM:$mask,
(OpNode _.RC:$src2, (OpNode _.RC:$src2,
(X86VBroadcast (_.ScalarLdFrag addr:$src3)), (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
_.RC:$src1, (i8 timm:$src4)), _.RC:$src1, (i8 imm:$src4)),
_.RC:$src1)), _.RC:$src1)),
(!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>; _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>;
def : Pat<(_.VT (vselect _.KRCWM:$mask, def : Pat<(_.VT (vselect _.KRCWM:$mask,
(OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)), (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
_.RC:$src1, _.RC:$src2, (i8 timm:$src4)), _.RC:$src1, _.RC:$src2, (i8 imm:$src4)),
_.RC:$src1)), _.RC:$src1)),
(!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>; _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>;
@ -11531,14 +11531,14 @@ multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr,
(X86VFixupimm (_.VT _.RC:$src1), (X86VFixupimm (_.VT _.RC:$src1),
(_.VT _.RC:$src2), (_.VT _.RC:$src2),
(TblVT.VT _.RC:$src3), (TblVT.VT _.RC:$src3),
(i32 timm:$src4))>, Sched<[sched]>; (i32 imm:$src4))>, Sched<[sched]>;
defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4), (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4),
OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
(X86VFixupimm (_.VT _.RC:$src1), (X86VFixupimm (_.VT _.RC:$src1),
(_.VT _.RC:$src2), (_.VT _.RC:$src2),
(TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))), (TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))),
(i32 timm:$src4))>, (i32 imm:$src4))>,
Sched<[sched.Folded, sched.ReadAfterFold]>; Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4), (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
@ -11547,7 +11547,7 @@ multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr,
(X86VFixupimm (_.VT _.RC:$src1), (X86VFixupimm (_.VT _.RC:$src1),
(_.VT _.RC:$src2), (_.VT _.RC:$src2),
(TblVT.VT (X86VBroadcast(TblVT.ScalarLdFrag addr:$src3))), (TblVT.VT (X86VBroadcast(TblVT.ScalarLdFrag addr:$src3))),
(i32 timm:$src4))>, (i32 imm:$src4))>,
EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
} // Constraints = "$src1 = $dst" } // Constraints = "$src1 = $dst"
} }
@ -11564,7 +11564,7 @@ let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
(X86VFixupimmSAE (_.VT _.RC:$src1), (X86VFixupimmSAE (_.VT _.RC:$src1),
(_.VT _.RC:$src2), (_.VT _.RC:$src2),
(TblVT.VT _.RC:$src3), (TblVT.VT _.RC:$src3),
(i32 timm:$src4))>, (i32 imm:$src4))>,
EVEX_B, Sched<[sched]>; EVEX_B, Sched<[sched]>;
} }
} }
@ -11580,7 +11580,7 @@ multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr,
(X86VFixupimms (_.VT _.RC:$src1), (X86VFixupimms (_.VT _.RC:$src1),
(_.VT _.RC:$src2), (_.VT _.RC:$src2),
(_src3VT.VT _src3VT.RC:$src3), (_src3VT.VT _src3VT.RC:$src3),
(i32 timm:$src4))>, Sched<[sched]>; (i32 imm:$src4))>, Sched<[sched]>;
defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2", OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
@ -11588,7 +11588,7 @@ multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr,
(X86VFixupimmSAEs (_.VT _.RC:$src1), (X86VFixupimmSAEs (_.VT _.RC:$src1),
(_.VT _.RC:$src2), (_.VT _.RC:$src2),
(_src3VT.VT _src3VT.RC:$src3), (_src3VT.VT _src3VT.RC:$src3),
(i32 timm:$src4))>, (i32 imm:$src4))>,
EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4), (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
@ -11597,13 +11597,13 @@ multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr,
(_.VT _.RC:$src2), (_.VT _.RC:$src2),
(_src3VT.VT (scalar_to_vector (_src3VT.VT (scalar_to_vector
(_src3VT.ScalarLdFrag addr:$src3))), (_src3VT.ScalarLdFrag addr:$src3))),
(i32 timm:$src4))>, (i32 imm:$src4))>,
Sched<[sched.Folded, sched.ReadAfterFold]>; Sched<[sched.Folded, sched.ReadAfterFold]>;
} }
} }
multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched, multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched,
AVX512VLVectorVTInfo _Vec, AVX512VLVectorVTInfo _Vec,
AVX512VLVectorVTInfo _Tbl> { AVX512VLVectorVTInfo _Tbl> {
let Predicates = [HasAVX512] in let Predicates = [HasAVX512] in
defm Z : avx512_fixupimm_packed_sae<0x54, "vfixupimm", sched.ZMM, defm Z : avx512_fixupimm_packed_sae<0x54, "vfixupimm", sched.ZMM,
@ -12072,7 +12072,7 @@ multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode,
"$src1, ${src2}"##BcstVTI.BroadcastStr##", $src3", "$src1, ${src2}"##BcstVTI.BroadcastStr##", $src3",
(OpNode (VTI.VT VTI.RC:$src1), (OpNode (VTI.VT VTI.RC:$src1),
(bitconvert (BcstVTI.VT (X86VBroadcast (loadi64 addr:$src2)))), (bitconvert (BcstVTI.VT (X86VBroadcast (loadi64 addr:$src2)))),
(i8 timm:$src3))>, EVEX_B, (i8 imm:$src3))>, EVEX_B,
Sched<[sched.Folded, sched.ReadAfterFold]>; Sched<[sched.Folded, sched.ReadAfterFold]>;
} }

View File

@ -114,13 +114,13 @@ multiclass ssse3_palign_mm<string asm, Intrinsic IntId,
def rri : MMXSS3AI<0x0F, MRMSrcReg, (outs VR64:$dst), def rri : MMXSS3AI<0x0F, MRMSrcReg, (outs VR64:$dst),
(ins VR64:$src1, VR64:$src2, u8imm:$src3), (ins VR64:$src1, VR64:$src2, u8imm:$src3),
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set VR64:$dst, (IntId VR64:$src1, VR64:$src2, (i8 timm:$src3)))]>, [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2, (i8 imm:$src3)))]>,
Sched<[sched]>; Sched<[sched]>;
def rmi : MMXSS3AI<0x0F, MRMSrcMem, (outs VR64:$dst), def rmi : MMXSS3AI<0x0F, MRMSrcMem, (outs VR64:$dst),
(ins VR64:$src1, i64mem:$src2, u8imm:$src3), (ins VR64:$src1, i64mem:$src2, u8imm:$src3),
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set VR64:$dst, (IntId VR64:$src1, [(set VR64:$dst, (IntId VR64:$src1,
(bitconvert (load_mmx addr:$src2)), (i8 timm:$src3)))]>, (bitconvert (load_mmx addr:$src2)), (i8 imm:$src3)))]>,
Sched<[sched.Folded, sched.ReadAfterFold]>; Sched<[sched.Folded, sched.ReadAfterFold]>;
} }
@ -496,14 +496,14 @@ def MMX_PSHUFWri : MMXIi8<0x70, MRMSrcReg,
(outs VR64:$dst), (ins VR64:$src1, u8imm:$src2), (outs VR64:$dst), (ins VR64:$src1, u8imm:$src2),
"pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}", "pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR64:$dst, [(set VR64:$dst,
(int_x86_sse_pshuf_w VR64:$src1, timm:$src2))]>, (int_x86_sse_pshuf_w VR64:$src1, imm:$src2))]>,
Sched<[SchedWriteShuffle.MMX]>; Sched<[SchedWriteShuffle.MMX]>;
def MMX_PSHUFWmi : MMXIi8<0x70, MRMSrcMem, def MMX_PSHUFWmi : MMXIi8<0x70, MRMSrcMem,
(outs VR64:$dst), (ins i64mem:$src1, u8imm:$src2), (outs VR64:$dst), (ins i64mem:$src1, u8imm:$src2),
"pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}", "pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR64:$dst, [(set VR64:$dst,
(int_x86_sse_pshuf_w (load_mmx addr:$src1), (int_x86_sse_pshuf_w (load_mmx addr:$src1),
timm:$src2))]>, imm:$src2))]>,
Sched<[SchedWriteShuffle.MMX.Folded]>; Sched<[SchedWriteShuffle.MMX.Folded]>;
// -- Conversion Instructions // -- Conversion Instructions

View File

@ -370,7 +370,7 @@ defm VMOVAPDY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv4f64, "movapd
defm VMOVUPSY : sse12_mov_packed<0x10, VR256, f256mem, loadv8f32, "movups", defm VMOVUPSY : sse12_mov_packed<0x10, VR256, f256mem, loadv8f32, "movups",
SSEPackedSingle, SchedWriteFMoveLS.YMM>, SSEPackedSingle, SchedWriteFMoveLS.YMM>,
PS, VEX, VEX_L, VEX_WIG; PS, VEX, VEX_L, VEX_WIG;
defm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64, "movupd", defm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64, "movupd",
SSEPackedDouble, SchedWriteFMoveLS.YMM>, SSEPackedDouble, SchedWriteFMoveLS.YMM>,
PD, VEX, VEX_L, VEX_WIG; PD, VEX, VEX_L, VEX_WIG;
} }
@ -1728,12 +1728,12 @@ multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
let isCommutable = 1 in let isCommutable = 1 in
def rr : SIi8<0xC2, MRMSrcReg, def rr : SIi8<0xC2, MRMSrcReg,
(outs RC:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc), asm, (outs RC:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc), asm,
[(set RC:$dst, (OpNode (VT RC:$src1), RC:$src2, timm:$cc))]>, [(set RC:$dst, (OpNode (VT RC:$src1), RC:$src2, imm:$cc))]>,
Sched<[sched]>; Sched<[sched]>;
def rm : SIi8<0xC2, MRMSrcMem, def rm : SIi8<0xC2, MRMSrcMem,
(outs RC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc), asm, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc), asm,
[(set RC:$dst, (OpNode (VT RC:$src1), [(set RC:$dst, (OpNode (VT RC:$src1),
(ld_frag addr:$src2), timm:$cc))]>, (ld_frag addr:$src2), imm:$cc))]>,
Sched<[sched.Folded, sched.ReadAfterFold]>; Sched<[sched.Folded, sched.ReadAfterFold]>;
} }
@ -1766,13 +1766,13 @@ multiclass sse12_cmp_scalar_int<Operand memop,
def rr_Int : SIi8<0xC2, MRMSrcReg, (outs VR128:$dst), def rr_Int : SIi8<0xC2, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src, u8imm:$cc), asm, (ins VR128:$src1, VR128:$src, u8imm:$cc), asm,
[(set VR128:$dst, (Int VR128:$src1, [(set VR128:$dst, (Int VR128:$src1,
VR128:$src, timm:$cc))]>, VR128:$src, imm:$cc))]>,
Sched<[sched]>; Sched<[sched]>;
let mayLoad = 1 in let mayLoad = 1 in
def rm_Int : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst), def rm_Int : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, memop:$src, u8imm:$cc), asm, (ins VR128:$src1, memop:$src, u8imm:$cc), asm,
[(set VR128:$dst, (Int VR128:$src1, [(set VR128:$dst, (Int VR128:$src1,
mem_cpat:$src, timm:$cc))]>, mem_cpat:$src, imm:$cc))]>,
Sched<[sched.Folded, sched.ReadAfterFold]>; Sched<[sched.Folded, sched.ReadAfterFold]>;
} }
@ -1891,12 +1891,12 @@ multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop,
let isCommutable = 1 in let isCommutable = 1 in
def rri : PIi8<0xC2, MRMSrcReg, def rri : PIi8<0xC2, MRMSrcReg,
(outs RC:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc), asm, (outs RC:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc), asm,
[(set RC:$dst, (VT (X86cmpp RC:$src1, RC:$src2, timm:$cc)))], d>, [(set RC:$dst, (VT (X86cmpp RC:$src1, RC:$src2, imm:$cc)))], d>,
Sched<[sched]>; Sched<[sched]>;
def rmi : PIi8<0xC2, MRMSrcMem, def rmi : PIi8<0xC2, MRMSrcMem,
(outs RC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc), asm, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc), asm,
[(set RC:$dst, [(set RC:$dst,
(VT (X86cmpp RC:$src1, (ld_frag addr:$src2), timm:$cc)))], d>, (VT (X86cmpp RC:$src1, (ld_frag addr:$src2), imm:$cc)))], d>,
Sched<[sched.Folded, sched.ReadAfterFold]>; Sched<[sched.Folded, sched.ReadAfterFold]>;
} }
@ -1921,7 +1921,7 @@ let Constraints = "$src1 = $dst" in {
SchedWriteFCmpSizes.PD.XMM, SSEPackedDouble, memopv2f64>, PD; SchedWriteFCmpSizes.PD.XMM, SSEPackedDouble, memopv2f64>, PD;
} }
def CommutableCMPCC : PatLeaf<(timm), [{ def CommutableCMPCC : PatLeaf<(imm), [{
uint64_t Imm = N->getZExtValue() & 0x7; uint64_t Imm = N->getZExtValue() & 0x7;
return (Imm == 0x00 || Imm == 0x03 || Imm == 0x04 || Imm == 0x07); return (Imm == 0x00 || Imm == 0x03 || Imm == 0x04 || Imm == 0x07);
}]>; }]>;
@ -1985,13 +1985,13 @@ multiclass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop,
def rmi : PIi8<0xC6, MRMSrcMem, (outs RC:$dst), def rmi : PIi8<0xC6, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2, u8imm:$src3), asm, (ins RC:$src1, x86memop:$src2, u8imm:$src3), asm,
[(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2), [(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2),
(i8 timm:$src3))))], d>, (i8 imm:$src3))))], d>,
Sched<[sched.Folded, sched.ReadAfterFold]>; Sched<[sched.Folded, sched.ReadAfterFold]>;
let isCommutable = IsCommutable in let isCommutable = IsCommutable in
def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst), def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, u8imm:$src3), asm, (ins RC:$src1, RC:$src2, u8imm:$src3), asm,
[(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2, [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2,
(i8 timm:$src3))))], d>, (i8 imm:$src3))))], d>,
Sched<[sched]>; Sched<[sched]>;
} }
@ -2736,7 +2736,7 @@ defm : scalar_math_patterns<fadd, "ADDSD", X86Movsd, v2f64, f64, FR64, loadf64,
defm : scalar_math_patterns<fsub, "SUBSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>; defm : scalar_math_patterns<fsub, "SUBSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>;
defm : scalar_math_patterns<fmul, "MULSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>; defm : scalar_math_patterns<fmul, "MULSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>;
defm : scalar_math_patterns<fdiv, "DIVSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>; defm : scalar_math_patterns<fdiv, "DIVSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>;
/// Unop Arithmetic /// Unop Arithmetic
/// In addition, we also have a special variant of the scalar form here to /// In addition, we also have a special variant of the scalar form here to
/// represent the associated intrinsic operation. This form is unlike the /// represent the associated intrinsic operation. This form is unlike the
@ -3497,7 +3497,7 @@ multiclass PDI_binop_rmi<bits<8> opc, bits<8> opc2, Format ImmForm,
!if(Is2Addr, !if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i8 timm:$src2))))]>, [(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i8 imm:$src2))))]>,
Sched<[schedImm]>; Sched<[schedImm]>;
} }
@ -3529,7 +3529,7 @@ multiclass PDI_binop_ri<bits<8> opc, Format ImmForm, string OpcodeStr,
!if(Is2Addr, !if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (VT (OpNode RC:$src1, (i8 timm:$src2))))]>, [(set RC:$dst, (VT (OpNode RC:$src1, (i8 imm:$src2))))]>,
Sched<[sched]>; Sched<[sched]>;
} }
@ -3612,7 +3612,7 @@ let Predicates = [HasAVX, prd] in {
!strconcat("v", OpcodeStr, !strconcat("v", OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"), "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst, [(set VR128:$dst,
(vt128 (OpNode VR128:$src1, (i8 timm:$src2))))]>, (vt128 (OpNode VR128:$src1, (i8 imm:$src2))))]>,
VEX, Sched<[sched.XMM]>, VEX_WIG; VEX, Sched<[sched.XMM]>, VEX_WIG;
def V#NAME#mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst), def V#NAME#mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst),
(ins i128mem:$src1, u8imm:$src2), (ins i128mem:$src1, u8imm:$src2),
@ -3620,7 +3620,7 @@ let Predicates = [HasAVX, prd] in {
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"), "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst, [(set VR128:$dst,
(vt128 (OpNode (load addr:$src1), (vt128 (OpNode (load addr:$src1),
(i8 timm:$src2))))]>, VEX, (i8 imm:$src2))))]>, VEX,
Sched<[sched.XMM.Folded]>, VEX_WIG; Sched<[sched.XMM.Folded]>, VEX_WIG;
} }
@ -3630,7 +3630,7 @@ let Predicates = [HasAVX2, prd] in {
!strconcat("v", OpcodeStr, !strconcat("v", OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"), "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst, [(set VR256:$dst,
(vt256 (OpNode VR256:$src1, (i8 timm:$src2))))]>, (vt256 (OpNode VR256:$src1, (i8 imm:$src2))))]>,
VEX, VEX_L, Sched<[sched.YMM]>, VEX_WIG; VEX, VEX_L, Sched<[sched.YMM]>, VEX_WIG;
def V#NAME#Ymi : Ii8<0x70, MRMSrcMem, (outs VR256:$dst), def V#NAME#Ymi : Ii8<0x70, MRMSrcMem, (outs VR256:$dst),
(ins i256mem:$src1, u8imm:$src2), (ins i256mem:$src1, u8imm:$src2),
@ -3638,7 +3638,7 @@ let Predicates = [HasAVX2, prd] in {
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"), "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst, [(set VR256:$dst,
(vt256 (OpNode (load addr:$src1), (vt256 (OpNode (load addr:$src1),
(i8 timm:$src2))))]>, VEX, VEX_L, (i8 imm:$src2))))]>, VEX, VEX_L,
Sched<[sched.YMM.Folded]>, VEX_WIG; Sched<[sched.YMM.Folded]>, VEX_WIG;
} }
@ -3648,7 +3648,7 @@ let Predicates = [UseSSE2] in {
!strconcat(OpcodeStr, !strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"), "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst, [(set VR128:$dst,
(vt128 (OpNode VR128:$src1, (i8 timm:$src2))))]>, (vt128 (OpNode VR128:$src1, (i8 imm:$src2))))]>,
Sched<[sched.XMM]>; Sched<[sched.XMM]>;
def mi : Ii8<0x70, MRMSrcMem, def mi : Ii8<0x70, MRMSrcMem,
(outs VR128:$dst), (ins i128mem:$src1, u8imm:$src2), (outs VR128:$dst), (ins i128mem:$src1, u8imm:$src2),
@ -3656,7 +3656,7 @@ let Predicates = [UseSSE2] in {
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"), "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst, [(set VR128:$dst,
(vt128 (OpNode (memop addr:$src1), (vt128 (OpNode (memop addr:$src1),
(i8 timm:$src2))))]>, (i8 imm:$src2))))]>,
Sched<[sched.XMM.Folded]>; Sched<[sched.XMM.Folded]>;
} }
} }
@ -4827,7 +4827,7 @@ multiclass ssse3_palignr<string asm, ValueType VT, RegisterClass RC,
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
!strconcat(asm, !strconcat(asm,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set RC:$dst, (VT (X86PAlignr RC:$src1, RC:$src2, (i8 timm:$src3))))]>, [(set RC:$dst, (VT (X86PAlignr RC:$src1, RC:$src2, (i8 imm:$src3))))]>,
Sched<[sched]>; Sched<[sched]>;
let mayLoad = 1 in let mayLoad = 1 in
def rmi : SS3AI<0x0F, MRMSrcMem, (outs RC:$dst), def rmi : SS3AI<0x0F, MRMSrcMem, (outs RC:$dst),
@ -4838,7 +4838,7 @@ multiclass ssse3_palignr<string asm, ValueType VT, RegisterClass RC,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set RC:$dst, (VT (X86PAlignr RC:$src1, [(set RC:$dst, (VT (X86PAlignr RC:$src1,
(memop_frag addr:$src2), (memop_frag addr:$src2),
(i8 timm:$src3))))]>, (i8 imm:$src3))))]>,
Sched<[sched.Folded, sched.ReadAfterFold]>; Sched<[sched.Folded, sched.ReadAfterFold]>;
} }
} }
@ -5315,7 +5315,7 @@ multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1> {
!strconcat(asm, !strconcat(asm,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set VR128:$dst, [(set VR128:$dst,
(X86insertps VR128:$src1, VR128:$src2, timm:$src3))]>, (X86insertps VR128:$src1, VR128:$src2, imm:$src3))]>,
Sched<[SchedWriteFShuffle.XMM]>; Sched<[SchedWriteFShuffle.XMM]>;
def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, f32mem:$src2, u8imm:$src3), (ins VR128:$src1, f32mem:$src2, u8imm:$src3),
@ -5326,7 +5326,7 @@ multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1> {
[(set VR128:$dst, [(set VR128:$dst,
(X86insertps VR128:$src1, (X86insertps VR128:$src1,
(v4f32 (scalar_to_vector (loadf32 addr:$src2))), (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
timm:$src3))]>, imm:$src3))]>,
Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>; Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
} }
@ -5352,7 +5352,7 @@ multiclass sse41_fp_unop_p<bits<8> opc, string OpcodeStr,
(outs RC:$dst), (ins RC:$src1, i32u8imm:$src2), (outs RC:$dst), (ins RC:$src1, i32u8imm:$src2),
!strconcat(OpcodeStr, !strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"), "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst, (VT (OpNode RC:$src1, timm:$src2)))]>, [(set RC:$dst, (VT (OpNode RC:$src1, imm:$src2)))]>,
Sched<[sched]>; Sched<[sched]>;
// Vector intrinsic operation, mem // Vector intrinsic operation, mem
@ -5361,7 +5361,7 @@ multiclass sse41_fp_unop_p<bits<8> opc, string OpcodeStr,
!strconcat(OpcodeStr, !strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"), "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst, [(set RC:$dst,
(VT (OpNode (mem_frag addr:$src1), timm:$src2)))]>, (VT (OpNode (mem_frag addr:$src1),imm:$src2)))]>,
Sched<[sched.Folded]>; Sched<[sched.Folded]>;
} }
@ -5443,7 +5443,7 @@ let ExeDomain = SSEPackedSingle, isCodeGenOnly = 1 in {
"ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"), "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
!strconcat(OpcodeStr, !strconcat(OpcodeStr,
"ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set VR128:$dst, (VT32 (OpNode VR128:$src1, VR128:$src2, timm:$src3)))]>, [(set VR128:$dst, (VT32 (OpNode VR128:$src1, VR128:$src2, imm:$src3)))]>,
Sched<[sched]>; Sched<[sched]>;
def SSm_Int : SS4AIi8<opcss, MRMSrcMem, def SSm_Int : SS4AIi8<opcss, MRMSrcMem,
@ -5454,7 +5454,7 @@ let ExeDomain = SSEPackedSingle, isCodeGenOnly = 1 in {
!strconcat(OpcodeStr, !strconcat(OpcodeStr,
"ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set VR128:$dst, [(set VR128:$dst,
(OpNode VR128:$src1, sse_load_f32:$src2, timm:$src3))]>, (OpNode VR128:$src1, sse_load_f32:$src2, imm:$src3))]>,
Sched<[sched.Folded, sched.ReadAfterFold]>; Sched<[sched.Folded, sched.ReadAfterFold]>;
} // ExeDomain = SSEPackedSingle, isCodeGenOnly = 1 } // ExeDomain = SSEPackedSingle, isCodeGenOnly = 1
@ -5466,7 +5466,7 @@ let ExeDomain = SSEPackedDouble, isCodeGenOnly = 1 in {
"sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"), "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
!strconcat(OpcodeStr, !strconcat(OpcodeStr,
"sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set VR128:$dst, (VT64 (OpNode VR128:$src1, VR128:$src2, timm:$src3)))]>, [(set VR128:$dst, (VT64 (OpNode VR128:$src1, VR128:$src2, imm:$src3)))]>,
Sched<[sched]>; Sched<[sched]>;
def SDm_Int : SS4AIi8<opcsd, MRMSrcMem, def SDm_Int : SS4AIi8<opcsd, MRMSrcMem,
@ -5477,7 +5477,7 @@ let ExeDomain = SSEPackedDouble, isCodeGenOnly = 1 in {
!strconcat(OpcodeStr, !strconcat(OpcodeStr,
"sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set VR128:$dst, [(set VR128:$dst,
(OpNode VR128:$src1, sse_load_f64:$src2, timm:$src3))]>, (OpNode VR128:$src1, sse_load_f64:$src2, imm:$src3))]>,
Sched<[sched.Folded, sched.ReadAfterFold]>; Sched<[sched.Folded, sched.ReadAfterFold]>;
} // ExeDomain = SSEPackedDouble, isCodeGenOnly = 1 } // ExeDomain = SSEPackedDouble, isCodeGenOnly = 1
} }
@ -5512,16 +5512,16 @@ let Predicates = [UseAVX] in {
} }
let Predicates = [UseAVX] in { let Predicates = [UseAVX] in {
def : Pat<(X86VRndScale FR32:$src1, timm:$src2), def : Pat<(X86VRndScale FR32:$src1, imm:$src2),
(VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src1, imm:$src2)>; (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src1, imm:$src2)>;
def : Pat<(X86VRndScale FR64:$src1, timm:$src2), def : Pat<(X86VRndScale FR64:$src1, imm:$src2),
(VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src1, imm:$src2)>; (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src1, imm:$src2)>;
} }
let Predicates = [UseAVX, OptForSize] in { let Predicates = [UseAVX, OptForSize] in {
def : Pat<(X86VRndScale (loadf32 addr:$src1), timm:$src2), def : Pat<(X86VRndScale (loadf32 addr:$src1), imm:$src2),
(VROUNDSSm (f32 (IMPLICIT_DEF)), addr:$src1, imm:$src2)>; (VROUNDSSm (f32 (IMPLICIT_DEF)), addr:$src1, imm:$src2)>;
def : Pat<(X86VRndScale (loadf64 addr:$src1), timm:$src2), def : Pat<(X86VRndScale (loadf64 addr:$src1), imm:$src2),
(VROUNDSDm (f64 (IMPLICIT_DEF)), addr:$src1, imm:$src2)>; (VROUNDSDm (f64 (IMPLICIT_DEF)), addr:$src1, imm:$src2)>;
} }
@ -5539,16 +5539,16 @@ defm ROUND : sse41_fp_binop_s<0x0A, 0x0B, "round", SchedWriteFRnd.Scl,
v4f32, v2f64, X86RndScales>; v4f32, v2f64, X86RndScales>;
let Predicates = [UseSSE41] in { let Predicates = [UseSSE41] in {
def : Pat<(X86VRndScale FR32:$src1, timm:$src2), def : Pat<(X86VRndScale FR32:$src1, imm:$src2),
(ROUNDSSr FR32:$src1, imm:$src2)>; (ROUNDSSr FR32:$src1, imm:$src2)>;
def : Pat<(X86VRndScale FR64:$src1, timm:$src2), def : Pat<(X86VRndScale FR64:$src1, imm:$src2),
(ROUNDSDr FR64:$src1, imm:$src2)>; (ROUNDSDr FR64:$src1, imm:$src2)>;
} }
let Predicates = [UseSSE41, OptForSize] in { let Predicates = [UseSSE41, OptForSize] in {
def : Pat<(X86VRndScale (loadf32 addr:$src1), timm:$src2), def : Pat<(X86VRndScale (loadf32 addr:$src1), imm:$src2),
(ROUNDSSm addr:$src1, imm:$src2)>; (ROUNDSSm addr:$src1, imm:$src2)>;
def : Pat<(X86VRndScale (loadf64 addr:$src1), timm:$src2), def : Pat<(X86VRndScale (loadf64 addr:$src1), imm:$src2),
(ROUNDSDm addr:$src1, imm:$src2)>; (ROUNDSDm addr:$src1, imm:$src2)>;
} }
@ -5830,7 +5830,7 @@ multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"), "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
!strconcat(OpcodeStr, !strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set RC:$dst, (IntId RC:$src1, RC:$src2, timm:$src3))]>, [(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))]>,
Sched<[sched]>; Sched<[sched]>;
def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst), def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2, u8imm:$src3), (ins RC:$src1, x86memop:$src2, u8imm:$src3),
@ -5840,7 +5840,7 @@ multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
!strconcat(OpcodeStr, !strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set RC:$dst, [(set RC:$dst,
(IntId RC:$src1, (memop_frag addr:$src2), timm:$src3))]>, (IntId RC:$src1, (memop_frag addr:$src2), imm:$src3))]>,
Sched<[sched.Folded, sched.ReadAfterFold]>; Sched<[sched.Folded, sched.ReadAfterFold]>;
} }
@ -5857,7 +5857,7 @@ multiclass SS41I_binop_rmi<bits<8> opc, string OpcodeStr, SDNode OpNode,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"), "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
!strconcat(OpcodeStr, !strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, timm:$src3)))]>, [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, imm:$src3)))]>,
Sched<[sched]>; Sched<[sched]>;
def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst), def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2, u8imm:$src3), (ins RC:$src1, x86memop:$src2, u8imm:$src3),
@ -5867,7 +5867,7 @@ multiclass SS41I_binop_rmi<bits<8> opc, string OpcodeStr, SDNode OpNode,
!strconcat(OpcodeStr, !strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set RC:$dst, [(set RC:$dst,
(OpVT (OpNode RC:$src1, (memop_frag addr:$src2), timm:$src3)))]>, (OpVT (OpNode RC:$src1, (memop_frag addr:$src2), imm:$src3)))]>,
Sched<[sched.Folded, sched.ReadAfterFold]>; Sched<[sched.Folded, sched.ReadAfterFold]>;
} }
@ -6012,7 +6012,7 @@ let ExeDomain = d, Constraints = !if(Is2Addr, "$src1 = $dst", "") in {
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"), "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
!strconcat(OpcodeStr, !strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, timm:$src3)))]>, [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, imm:$src3)))]>,
Sched<[sched]>; Sched<[sched]>;
def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst), def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2, u8imm:$src3), (ins RC:$src1, x86memop:$src2, u8imm:$src3),
@ -6022,12 +6022,12 @@ let ExeDomain = d, Constraints = !if(Is2Addr, "$src1 = $dst", "") in {
!strconcat(OpcodeStr, !strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set RC:$dst, [(set RC:$dst,
(OpVT (OpNode RC:$src1, (memop_frag addr:$src2), timm:$src3)))]>, (OpVT (OpNode RC:$src1, (memop_frag addr:$src2), imm:$src3)))]>,
Sched<[sched.Folded, sched.ReadAfterFold]>; Sched<[sched.Folded, sched.ReadAfterFold]>;
} }
// Pattern to commute if load is in first source. // Pattern to commute if load is in first source.
def : Pat<(OpVT (OpNode (memop_frag addr:$src2), RC:$src1, timm:$src3)), def : Pat<(OpVT (OpNode (memop_frag addr:$src2), RC:$src1, imm:$src3)),
(!cast<Instruction>(NAME#"rmi") RC:$src1, addr:$src2, (!cast<Instruction>(NAME#"rmi") RC:$src1, addr:$src2,
(commuteXForm imm:$src3))>; (commuteXForm imm:$src3))>;
} }
@ -6065,36 +6065,36 @@ let Predicates = [HasAVX2] in {
// Emulate vXi32/vXi64 blends with vXf32/vXf64 or pblendw. // Emulate vXi32/vXi64 blends with vXf32/vXf64 or pblendw.
// ExecutionDomainFixPass will cleanup domains later on. // ExecutionDomainFixPass will cleanup domains later on.
let Predicates = [HasAVX1Only] in { let Predicates = [HasAVX1Only] in {
def : Pat<(X86Blendi (v4i64 VR256:$src1), (v4i64 VR256:$src2), timm:$src3), def : Pat<(X86Blendi (v4i64 VR256:$src1), (v4i64 VR256:$src2), imm:$src3),
(VBLENDPDYrri VR256:$src1, VR256:$src2, imm:$src3)>; (VBLENDPDYrri VR256:$src1, VR256:$src2, imm:$src3)>;
def : Pat<(X86Blendi VR256:$src1, (loadv4i64 addr:$src2), timm:$src3), def : Pat<(X86Blendi VR256:$src1, (loadv4i64 addr:$src2), imm:$src3),
(VBLENDPDYrmi VR256:$src1, addr:$src2, imm:$src3)>; (VBLENDPDYrmi VR256:$src1, addr:$src2, imm:$src3)>;
def : Pat<(X86Blendi (loadv4i64 addr:$src2), VR256:$src1, timm:$src3), def : Pat<(X86Blendi (loadv4i64 addr:$src2), VR256:$src1, imm:$src3),
(VBLENDPDYrmi VR256:$src1, addr:$src2, (BlendCommuteImm4 imm:$src3))>; (VBLENDPDYrmi VR256:$src1, addr:$src2, (BlendCommuteImm4 imm:$src3))>;
// Use pblendw for 128-bit integer to keep it in the integer domain and prevent // Use pblendw for 128-bit integer to keep it in the integer domain and prevent
// it from becoming movsd via commuting under optsize. // it from becoming movsd via commuting under optsize.
def : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), timm:$src3), def : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), imm:$src3),
(VPBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm2 imm:$src3))>; (VPBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm2 imm:$src3))>;
def : Pat<(X86Blendi VR128:$src1, (loadv2i64 addr:$src2), timm:$src3), def : Pat<(X86Blendi VR128:$src1, (loadv2i64 addr:$src2), imm:$src3),
(VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm2 imm:$src3))>; (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm2 imm:$src3))>;
def : Pat<(X86Blendi (loadv2i64 addr:$src2), VR128:$src1, timm:$src3), def : Pat<(X86Blendi (loadv2i64 addr:$src2), VR128:$src1, imm:$src3),
(VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm2 imm:$src3))>; (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm2 imm:$src3))>;
def : Pat<(X86Blendi (v8i32 VR256:$src1), (v8i32 VR256:$src2), timm:$src3), def : Pat<(X86Blendi (v8i32 VR256:$src1), (v8i32 VR256:$src2), imm:$src3),
(VBLENDPSYrri VR256:$src1, VR256:$src2, imm:$src3)>; (VBLENDPSYrri VR256:$src1, VR256:$src2, imm:$src3)>;
def : Pat<(X86Blendi VR256:$src1, (loadv8i32 addr:$src2), timm:$src3), def : Pat<(X86Blendi VR256:$src1, (loadv8i32 addr:$src2), imm:$src3),
(VBLENDPSYrmi VR256:$src1, addr:$src2, imm:$src3)>; (VBLENDPSYrmi VR256:$src1, addr:$src2, imm:$src3)>;
def : Pat<(X86Blendi (loadv8i32 addr:$src2), VR256:$src1, timm:$src3), def : Pat<(X86Blendi (loadv8i32 addr:$src2), VR256:$src1, imm:$src3),
(VBLENDPSYrmi VR256:$src1, addr:$src2, (BlendCommuteImm8 imm:$src3))>; (VBLENDPSYrmi VR256:$src1, addr:$src2, (BlendCommuteImm8 imm:$src3))>;
// Use pblendw for 128-bit integer to keep it in the integer domain and prevent // Use pblendw for 128-bit integer to keep it in the integer domain and prevent
// it from becoming movss via commuting under optsize. // it from becoming movss via commuting under optsize.
def : Pat<(X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2), timm:$src3), def : Pat<(X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2), imm:$src3),
(VPBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm4 imm:$src3))>; (VPBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm4 imm:$src3))>;
def : Pat<(X86Blendi VR128:$src1, (loadv4i32 addr:$src2), timm:$src3), def : Pat<(X86Blendi VR128:$src1, (loadv4i32 addr:$src2), imm:$src3),
(VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm4 imm:$src3))>; (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm4 imm:$src3))>;
def : Pat<(X86Blendi (loadv4i32 addr:$src2), VR128:$src1, timm:$src3), def : Pat<(X86Blendi (loadv4i32 addr:$src2), VR128:$src1, imm:$src3),
(VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm4 imm:$src3))>; (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm4 imm:$src3))>;
} }
@ -6111,18 +6111,18 @@ defm PBLENDW : SS41I_blend_rmi<0x0E, "pblendw", X86Blendi, v8i16,
let Predicates = [UseSSE41] in { let Predicates = [UseSSE41] in {
// Use pblendw for 128-bit integer to keep it in the integer domain and prevent // Use pblendw for 128-bit integer to keep it in the integer domain and prevent
// it from becoming movss via commuting under optsize. // it from becoming movss via commuting under optsize.
def : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), timm:$src3), def : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), imm:$src3),
(PBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm2 imm:$src3))>; (PBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm2 imm:$src3))>;
def : Pat<(X86Blendi VR128:$src1, (memopv2i64 addr:$src2), timm:$src3), def : Pat<(X86Blendi VR128:$src1, (memopv2i64 addr:$src2), imm:$src3),
(PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm2 imm:$src3))>; (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm2 imm:$src3))>;
def : Pat<(X86Blendi (memopv2i64 addr:$src2), VR128:$src1, timm:$src3), def : Pat<(X86Blendi (memopv2i64 addr:$src2), VR128:$src1, imm:$src3),
(PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm2 imm:$src3))>; (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm2 imm:$src3))>;
def : Pat<(X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2), timm:$src3), def : Pat<(X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2), imm:$src3),
(PBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm4 imm:$src3))>; (PBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm4 imm:$src3))>;
def : Pat<(X86Blendi VR128:$src1, (memopv4i32 addr:$src2), timm:$src3), def : Pat<(X86Blendi VR128:$src1, (memopv4i32 addr:$src2), imm:$src3),
(PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm4 imm:$src3))>; (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm4 imm:$src3))>;
def : Pat<(X86Blendi (memopv4i32 addr:$src2), VR128:$src1, timm:$src3), def : Pat<(X86Blendi (memopv4i32 addr:$src2), VR128:$src1, imm:$src3),
(PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm4 imm:$src3))>; (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm4 imm:$src3))>;
} }
@ -6596,7 +6596,7 @@ let Constraints = "$src1 = $dst", Predicates = [HasSHA] in {
"sha1rnds4\t{$src3, $src2, $dst|$dst, $src2, $src3}", "sha1rnds4\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set VR128:$dst, [(set VR128:$dst,
(int_x86_sha1rnds4 VR128:$src1, VR128:$src2, (int_x86_sha1rnds4 VR128:$src1, VR128:$src2,
(i8 timm:$src3)))]>, TA, (i8 imm:$src3)))]>, TA,
Sched<[SchedWriteVecIMul.XMM]>; Sched<[SchedWriteVecIMul.XMM]>;
def SHA1RNDS4rmi : Ii8<0xCC, MRMSrcMem, (outs VR128:$dst), def SHA1RNDS4rmi : Ii8<0xCC, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2, u8imm:$src3), (ins VR128:$src1, i128mem:$src2, u8imm:$src3),
@ -6604,7 +6604,7 @@ let Constraints = "$src1 = $dst", Predicates = [HasSHA] in {
[(set VR128:$dst, [(set VR128:$dst,
(int_x86_sha1rnds4 VR128:$src1, (int_x86_sha1rnds4 VR128:$src1,
(memop addr:$src2), (memop addr:$src2),
(i8 timm:$src3)))]>, TA, (i8 imm:$src3)))]>, TA,
Sched<[SchedWriteVecIMul.XMM.Folded, Sched<[SchedWriteVecIMul.XMM.Folded,
SchedWriteVecIMul.XMM.ReadAfterFold]>; SchedWriteVecIMul.XMM.ReadAfterFold]>;
@ -6722,26 +6722,26 @@ let Predicates = [HasAVX, HasAES] in {
(ins VR128:$src1, u8imm:$src2), (ins VR128:$src1, u8imm:$src2),
"vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, [(set VR128:$dst,
(int_x86_aesni_aeskeygenassist VR128:$src1, timm:$src2))]>, (int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>,
Sched<[WriteAESKeyGen]>, VEX, VEX_WIG; Sched<[WriteAESKeyGen]>, VEX, VEX_WIG;
def VAESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst), def VAESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst),
(ins i128mem:$src1, u8imm:$src2), (ins i128mem:$src1, u8imm:$src2),
"vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, [(set VR128:$dst,
(int_x86_aesni_aeskeygenassist (load addr:$src1), timm:$src2))]>, (int_x86_aesni_aeskeygenassist (load addr:$src1), imm:$src2))]>,
Sched<[WriteAESKeyGen.Folded]>, VEX, VEX_WIG; Sched<[WriteAESKeyGen.Folded]>, VEX, VEX_WIG;
} }
def AESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst), def AESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, u8imm:$src2), (ins VR128:$src1, u8imm:$src2),
"aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, [(set VR128:$dst,
(int_x86_aesni_aeskeygenassist VR128:$src1, timm:$src2))]>, (int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>,
Sched<[WriteAESKeyGen]>; Sched<[WriteAESKeyGen]>;
def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst), def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst),
(ins i128mem:$src1, u8imm:$src2), (ins i128mem:$src1, u8imm:$src2),
"aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, [(set VR128:$dst,
(int_x86_aesni_aeskeygenassist (memop addr:$src1), timm:$src2))]>, (int_x86_aesni_aeskeygenassist (memop addr:$src1), imm:$src2))]>,
Sched<[WriteAESKeyGen.Folded]>; Sched<[WriteAESKeyGen.Folded]>;
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
@ -6762,7 +6762,7 @@ let Predicates = [NoAVX, HasPCLMUL] in {
(ins VR128:$src1, VR128:$src2, u8imm:$src3), (ins VR128:$src1, VR128:$src2, u8imm:$src3),
"pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}", "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set VR128:$dst, [(set VR128:$dst,
(int_x86_pclmulqdq VR128:$src1, VR128:$src2, timm:$src3))]>, (int_x86_pclmulqdq VR128:$src1, VR128:$src2, imm:$src3))]>,
Sched<[WriteCLMul]>; Sched<[WriteCLMul]>;
def PCLMULQDQrm : PCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst), def PCLMULQDQrm : PCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst),
@ -6770,12 +6770,12 @@ let Predicates = [NoAVX, HasPCLMUL] in {
"pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}", "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set VR128:$dst, [(set VR128:$dst,
(int_x86_pclmulqdq VR128:$src1, (memop addr:$src2), (int_x86_pclmulqdq VR128:$src1, (memop addr:$src2),
timm:$src3))]>, imm:$src3))]>,
Sched<[WriteCLMul.Folded, WriteCLMul.ReadAfterFold]>; Sched<[WriteCLMul.Folded, WriteCLMul.ReadAfterFold]>;
} // Constraints = "$src1 = $dst" } // Constraints = "$src1 = $dst"
def : Pat<(int_x86_pclmulqdq (memop addr:$src2), VR128:$src1, def : Pat<(int_x86_pclmulqdq (memop addr:$src2), VR128:$src1,
(i8 timm:$src3)), (i8 imm:$src3)),
(PCLMULQDQrm VR128:$src1, addr:$src2, (PCLMULQDQrm VR128:$src1, addr:$src2,
(PCLMULCommuteImm imm:$src3))>; (PCLMULCommuteImm imm:$src3))>;
} // Predicates = [NoAVX, HasPCLMUL] } // Predicates = [NoAVX, HasPCLMUL]
@ -6799,19 +6799,19 @@ multiclass vpclmulqdq<RegisterClass RC, X86MemOperand MemOp,
(ins RC:$src1, RC:$src2, u8imm:$src3), (ins RC:$src1, RC:$src2, u8imm:$src3),
"vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set RC:$dst, [(set RC:$dst,
(IntId RC:$src1, RC:$src2, timm:$src3))]>, (IntId RC:$src1, RC:$src2, imm:$src3))]>,
Sched<[WriteCLMul]>; Sched<[WriteCLMul]>;
def rm : PCLMULIi8<0x44, MRMSrcMem, (outs RC:$dst), def rm : PCLMULIi8<0x44, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, MemOp:$src2, u8imm:$src3), (ins RC:$src1, MemOp:$src2, u8imm:$src3),
"vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set RC:$dst, [(set RC:$dst,
(IntId RC:$src1, (LdFrag addr:$src2), timm:$src3))]>, (IntId RC:$src1, (LdFrag addr:$src2), imm:$src3))]>,
Sched<[WriteCLMul.Folded, WriteCLMul.ReadAfterFold]>; Sched<[WriteCLMul.Folded, WriteCLMul.ReadAfterFold]>;
// We can commute a load in the first operand by swapping the sources and // We can commute a load in the first operand by swapping the sources and
// rotating the immediate. // rotating the immediate.
def : Pat<(IntId (LdFrag addr:$src2), RC:$src1, (i8 timm:$src3)), def : Pat<(IntId (LdFrag addr:$src2), RC:$src1, (i8 imm:$src3)),
(!cast<Instruction>(NAME#"rm") RC:$src1, addr:$src2, (!cast<Instruction>(NAME#"rm") RC:$src1, addr:$src2,
(PCLMULCommuteImm imm:$src3))>; (PCLMULCommuteImm imm:$src3))>;
} }
@ -6857,8 +6857,8 @@ let Constraints = "$src = $dst" in {
def EXTRQI : Ii8<0x78, MRMXr, (outs VR128:$dst), def EXTRQI : Ii8<0x78, MRMXr, (outs VR128:$dst),
(ins VR128:$src, u8imm:$len, u8imm:$idx), (ins VR128:$src, u8imm:$len, u8imm:$idx),
"extrq\t{$idx, $len, $src|$src, $len, $idx}", "extrq\t{$idx, $len, $src|$src, $len, $idx}",
[(set VR128:$dst, (X86extrqi VR128:$src, timm:$len, [(set VR128:$dst, (X86extrqi VR128:$src, imm:$len,
timm:$idx))]>, imm:$idx))]>,
PD, Sched<[SchedWriteVecALU.XMM]>; PD, Sched<[SchedWriteVecALU.XMM]>;
def EXTRQ : I<0x79, MRMSrcReg, (outs VR128:$dst), def EXTRQ : I<0x79, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src, VR128:$mask), (ins VR128:$src, VR128:$mask),
@ -6871,7 +6871,7 @@ def INSERTQI : Ii8<0x78, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src, VR128:$src2, u8imm:$len, u8imm:$idx), (ins VR128:$src, VR128:$src2, u8imm:$len, u8imm:$idx),
"insertq\t{$idx, $len, $src2, $src|$src, $src2, $len, $idx}", "insertq\t{$idx, $len, $src2, $src|$src, $src2, $len, $idx}",
[(set VR128:$dst, (X86insertqi VR128:$src, VR128:$src2, [(set VR128:$dst, (X86insertqi VR128:$src, VR128:$src2,
timm:$len, timm:$idx))]>, imm:$len, imm:$idx))]>,
XD, Sched<[SchedWriteVecALU.XMM]>; XD, Sched<[SchedWriteVecALU.XMM]>;
def INSERTQ : I<0x79, MRMSrcReg, (outs VR128:$dst), def INSERTQ : I<0x79, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src, VR128:$mask), (ins VR128:$src, VR128:$mask),
@ -7142,13 +7142,13 @@ multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr,
def ri : AVXAIi8<opc_rmi, MRMSrcReg, (outs RC:$dst), def ri : AVXAIi8<opc_rmi, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, u8imm:$src2), (ins RC:$src1, u8imm:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst, (f_vt (X86VPermilpi RC:$src1, (i8 timm:$src2))))]>, VEX, [(set RC:$dst, (f_vt (X86VPermilpi RC:$src1, (i8 imm:$src2))))]>, VEX,
Sched<[sched]>; Sched<[sched]>;
def mi : AVXAIi8<opc_rmi, MRMSrcMem, (outs RC:$dst), def mi : AVXAIi8<opc_rmi, MRMSrcMem, (outs RC:$dst),
(ins x86memop_f:$src1, u8imm:$src2), (ins x86memop_f:$src1, u8imm:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst, [(set RC:$dst,
(f_vt (X86VPermilpi (load addr:$src1), (i8 timm:$src2))))]>, VEX, (f_vt (X86VPermilpi (load addr:$src1), (i8 imm:$src2))))]>, VEX,
Sched<[sched.Folded]>; Sched<[sched.Folded]>;
}// Predicates = [HasAVX, NoVLX] }// Predicates = [HasAVX, NoVLX]
} }
@ -7180,13 +7180,13 @@ def VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, u8imm:$src3), (ins VR256:$src1, VR256:$src2, u8imm:$src3),
"vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set VR256:$dst, (v4f64 (X86VPerm2x128 VR256:$src1, VR256:$src2, [(set VR256:$dst, (v4f64 (X86VPerm2x128 VR256:$src1, VR256:$src2,
(i8 timm:$src3))))]>, VEX_4V, VEX_L, (i8 imm:$src3))))]>, VEX_4V, VEX_L,
Sched<[WriteFShuffle256]>; Sched<[WriteFShuffle256]>;
def VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst), def VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, f256mem:$src2, u8imm:$src3), (ins VR256:$src1, f256mem:$src2, u8imm:$src3),
"vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (loadv4f64 addr:$src2), [(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (loadv4f64 addr:$src2),
(i8 timm:$src3)))]>, VEX_4V, VEX_L, (i8 imm:$src3)))]>, VEX_4V, VEX_L,
Sched<[WriteFShuffle256.Folded, WriteFShuffle256.ReadAfterFold]>; Sched<[WriteFShuffle256.Folded, WriteFShuffle256.ReadAfterFold]>;
} }
@ -7198,19 +7198,19 @@ def Perm2XCommuteImm : SDNodeXForm<imm, [{
let Predicates = [HasAVX] in { let Predicates = [HasAVX] in {
// Pattern with load in other operand. // Pattern with load in other operand.
def : Pat<(v4f64 (X86VPerm2x128 (loadv4f64 addr:$src2), def : Pat<(v4f64 (X86VPerm2x128 (loadv4f64 addr:$src2),
VR256:$src1, (i8 timm:$imm))), VR256:$src1, (i8 imm:$imm))),
(VPERM2F128rm VR256:$src1, addr:$src2, (Perm2XCommuteImm imm:$imm))>; (VPERM2F128rm VR256:$src1, addr:$src2, (Perm2XCommuteImm imm:$imm))>;
} }
let Predicates = [HasAVX1Only] in { let Predicates = [HasAVX1Only] in {
def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 timm:$imm))), def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
(VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1,
(loadv4i64 addr:$src2), (i8 timm:$imm))), (loadv4i64 addr:$src2), (i8 imm:$imm))),
(VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
// Pattern with load in other operand. // Pattern with load in other operand.
def : Pat<(v4i64 (X86VPerm2x128 (loadv4i64 addr:$src2), def : Pat<(v4i64 (X86VPerm2x128 (loadv4i64 addr:$src2),
VR256:$src1, (i8 timm:$imm))), VR256:$src1, (i8 imm:$imm))),
(VPERM2F128rm VR256:$src1, addr:$src2, (Perm2XCommuteImm imm:$imm))>; (VPERM2F128rm VR256:$src1, addr:$src2, (Perm2XCommuteImm imm:$imm))>;
} }
@ -7256,7 +7256,7 @@ multiclass f16c_ps2ph<RegisterClass RC, X86MemOperand x86memop,
def rr : Ii8<0x1D, MRMDestReg, (outs VR128:$dst), def rr : Ii8<0x1D, MRMDestReg, (outs VR128:$dst),
(ins RC:$src1, i32u8imm:$src2), (ins RC:$src1, i32u8imm:$src2),
"vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, (X86cvtps2ph RC:$src1, timm:$src2))]>, [(set VR128:$dst, (X86cvtps2ph RC:$src1, imm:$src2))]>,
TAPD, VEX, Sched<[RR]>; TAPD, VEX, Sched<[RR]>;
let hasSideEffects = 0, mayStore = 1 in let hasSideEffects = 0, mayStore = 1 in
def mr : Ii8<0x1D, MRMDestMem, (outs), def mr : Ii8<0x1D, MRMDestMem, (outs),
@ -7326,18 +7326,18 @@ multiclass AVX2_blend_rmi<bits<8> opc, string OpcodeStr, SDNode OpNode,
(ins RC:$src1, RC:$src2, u8imm:$src3), (ins RC:$src1, RC:$src2, u8imm:$src3),
!strconcat(OpcodeStr, !strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, timm:$src3)))]>, [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, imm:$src3)))]>,
Sched<[sched]>, VEX_4V; Sched<[sched]>, VEX_4V;
def rmi : AVX2AIi8<opc, MRMSrcMem, (outs RC:$dst), def rmi : AVX2AIi8<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2, u8imm:$src3), (ins RC:$src1, x86memop:$src2, u8imm:$src3),
!strconcat(OpcodeStr, !strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst, [(set RC:$dst,
(OpVT (OpNode RC:$src1, (load addr:$src2), timm:$src3)))]>, (OpVT (OpNode RC:$src1, (load addr:$src2), imm:$src3)))]>,
Sched<[sched.Folded, sched.ReadAfterFold]>, VEX_4V; Sched<[sched.Folded, sched.ReadAfterFold]>, VEX_4V;
// Pattern to commute if load is in first source. // Pattern to commute if load is in first source.
def : Pat<(OpVT (OpNode (load addr:$src2), RC:$src1, timm:$src3)), def : Pat<(OpVT (OpNode (load addr:$src2), RC:$src1, imm:$src3)),
(!cast<Instruction>(NAME#"rmi") RC:$src1, addr:$src2, (!cast<Instruction>(NAME#"rmi") RC:$src1, addr:$src2,
(commuteXForm imm:$src3))>; (commuteXForm imm:$src3))>;
} }
@ -7350,18 +7350,18 @@ defm VPBLENDDY : AVX2_blend_rmi<0x02, "vpblendd", X86Blendi, v8i32,
SchedWriteBlend.YMM, VR256, i256mem, SchedWriteBlend.YMM, VR256, i256mem,
BlendCommuteImm8>, VEX_L; BlendCommuteImm8>, VEX_L;
def : Pat<(X86Blendi (v4i64 VR256:$src1), (v4i64 VR256:$src2), timm:$src3), def : Pat<(X86Blendi (v4i64 VR256:$src1), (v4i64 VR256:$src2), imm:$src3),
(VPBLENDDYrri VR256:$src1, VR256:$src2, (BlendScaleImm4 imm:$src3))>; (VPBLENDDYrri VR256:$src1, VR256:$src2, (BlendScaleImm4 imm:$src3))>;
def : Pat<(X86Blendi VR256:$src1, (loadv4i64 addr:$src2), timm:$src3), def : Pat<(X86Blendi VR256:$src1, (loadv4i64 addr:$src2), imm:$src3),
(VPBLENDDYrmi VR256:$src1, addr:$src2, (BlendScaleImm4 imm:$src3))>; (VPBLENDDYrmi VR256:$src1, addr:$src2, (BlendScaleImm4 imm:$src3))>;
def : Pat<(X86Blendi (loadv4i64 addr:$src2), VR256:$src1, timm:$src3), def : Pat<(X86Blendi (loadv4i64 addr:$src2), VR256:$src1, imm:$src3),
(VPBLENDDYrmi VR256:$src1, addr:$src2, (BlendScaleCommuteImm4 imm:$src3))>; (VPBLENDDYrmi VR256:$src1, addr:$src2, (BlendScaleCommuteImm4 imm:$src3))>;
def : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), timm:$src3), def : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), imm:$src3),
(VPBLENDDrri VR128:$src1, VR128:$src2, (BlendScaleImm2to4 imm:$src3))>; (VPBLENDDrri VR128:$src1, VR128:$src2, (BlendScaleImm2to4 imm:$src3))>;
def : Pat<(X86Blendi VR128:$src1, (loadv2i64 addr:$src2), timm:$src3), def : Pat<(X86Blendi VR128:$src1, (loadv2i64 addr:$src2), imm:$src3),
(VPBLENDDrmi VR128:$src1, addr:$src2, (BlendScaleImm2to4 imm:$src3))>; (VPBLENDDrmi VR128:$src1, addr:$src2, (BlendScaleImm2to4 imm:$src3))>;
def : Pat<(X86Blendi (loadv2i64 addr:$src2), VR128:$src1, timm:$src3), def : Pat<(X86Blendi (loadv2i64 addr:$src2), VR128:$src1, imm:$src3),
(VPBLENDDrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm2to4 imm:$src3))>; (VPBLENDDrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm2to4 imm:$src3))>;
} }
@ -7611,7 +7611,7 @@ multiclass avx2_perm_imm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
!strconcat(OpcodeStr, !strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"), "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst, [(set VR256:$dst,
(OpVT (X86VPermi VR256:$src1, (i8 timm:$src2))))]>, (OpVT (X86VPermi VR256:$src1, (i8 imm:$src2))))]>,
Sched<[Sched]>, VEX, VEX_L; Sched<[Sched]>, VEX, VEX_L;
def Ymi : AVX2AIi8<opc, MRMSrcMem, (outs VR256:$dst), def Ymi : AVX2AIi8<opc, MRMSrcMem, (outs VR256:$dst),
(ins memOp:$src1, u8imm:$src2), (ins memOp:$src1, u8imm:$src2),
@ -7619,7 +7619,7 @@ multiclass avx2_perm_imm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"), "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst, [(set VR256:$dst,
(OpVT (X86VPermi (mem_frag addr:$src1), (OpVT (X86VPermi (mem_frag addr:$src1),
(i8 timm:$src2))))]>, (i8 imm:$src2))))]>,
Sched<[Sched.Folded, Sched.ReadAfterFold]>, VEX, VEX_L; Sched<[Sched.Folded, Sched.ReadAfterFold]>, VEX, VEX_L;
} }
} }
@ -7638,18 +7638,18 @@ def VPERM2I128rr : AVX2AIi8<0x46, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, u8imm:$src3), (ins VR256:$src1, VR256:$src2, u8imm:$src3),
"vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set VR256:$dst, (v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2, [(set VR256:$dst, (v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2,
(i8 timm:$src3))))]>, Sched<[WriteShuffle256]>, (i8 imm:$src3))))]>, Sched<[WriteShuffle256]>,
VEX_4V, VEX_L; VEX_4V, VEX_L;
def VPERM2I128rm : AVX2AIi8<0x46, MRMSrcMem, (outs VR256:$dst), def VPERM2I128rm : AVX2AIi8<0x46, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, f256mem:$src2, u8imm:$src3), (ins VR256:$src1, f256mem:$src2, u8imm:$src3),
"vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (loadv4i64 addr:$src2), [(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (loadv4i64 addr:$src2),
(i8 timm:$src3)))]>, (i8 imm:$src3)))]>,
Sched<[WriteShuffle256.Folded, WriteShuffle256.ReadAfterFold]>, VEX_4V, VEX_L; Sched<[WriteShuffle256.Folded, WriteShuffle256.ReadAfterFold]>, VEX_4V, VEX_L;
let Predicates = [HasAVX2] in let Predicates = [HasAVX2] in
def : Pat<(v4i64 (X86VPerm2x128 (loadv4i64 addr:$src2), def : Pat<(v4i64 (X86VPerm2x128 (loadv4i64 addr:$src2),
VR256:$src1, (i8 timm:$imm))), VR256:$src1, (i8 imm:$imm))),
(VPERM2I128rm VR256:$src1, addr:$src2, (Perm2XCommuteImm imm:$imm))>; (VPERM2I128rm VR256:$src1, addr:$src2, (Perm2XCommuteImm imm:$imm))>;
@ -7931,13 +7931,13 @@ multiclass GF2P8AFFINE_rmi<bits<8> Op, string OpStr, ValueType OpVT,
OpStr##"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}") in { OpStr##"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}") in {
def rri : Ii8<Op, MRMSrcReg, (outs RC:$dst), def rri : Ii8<Op, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, u8imm:$src3), "", (ins RC:$src1, RC:$src2, u8imm:$src3), "",
[(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, timm:$src3)))], [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, imm:$src3)))],
SSEPackedInt>, Sched<[SchedWriteVecALU.XMM]>; SSEPackedInt>, Sched<[SchedWriteVecALU.XMM]>;
def rmi : Ii8<Op, MRMSrcMem, (outs RC:$dst), def rmi : Ii8<Op, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, X86MemOp:$src2, u8imm:$src3), "", (ins RC:$src1, X86MemOp:$src2, u8imm:$src3), "",
[(set RC:$dst, (OpVT (OpNode RC:$src1, [(set RC:$dst, (OpVT (OpNode RC:$src1,
(MemOpFrag addr:$src2), (MemOpFrag addr:$src2),
timm:$src3)))], SSEPackedInt>, imm:$src3)))], SSEPackedInt>,
Sched<[SchedWriteVecALU.XMM.Folded, SchedWriteVecALU.XMM.ReadAfterFold]>; Sched<[SchedWriteVecALU.XMM.Folded, SchedWriteVecALU.XMM.ReadAfterFold]>;
} }
} }

View File

@ -43,7 +43,7 @@ def INT3 : I<0xcc, RawFrm, (outs), (ins), "int3", [(int_x86_int (i8 3))]>;
let SchedRW = [WriteSystem] in { let SchedRW = [WriteSystem] in {
def INT : Ii8<0xcd, RawFrm, (outs), (ins u8imm:$trap), "int\t$trap", def INT : Ii8<0xcd, RawFrm, (outs), (ins u8imm:$trap), "int\t$trap",
[(int_x86_int timm:$trap)]>; [(int_x86_int imm:$trap)]>;
def SYSCALL : I<0x05, RawFrm, (outs), (ins), "syscall", []>, TB; def SYSCALL : I<0x05, RawFrm, (outs), (ins), "syscall", []>, TB;

View File

@ -45,7 +45,7 @@ def XTEST : I<0x01, MRM_D6, (outs), (ins),
def XABORT : Ii8<0xc6, MRM_F8, (outs), (ins i8imm:$imm), def XABORT : Ii8<0xc6, MRM_F8, (outs), (ins i8imm:$imm),
"xabort\t$imm", "xabort\t$imm",
[(int_x86_xabort timm:$imm)]>, Requires<[HasRTM]>; [(int_x86_xabort imm:$imm)]>, Requires<[HasRTM]>;
} // SchedRW } // SchedRW
// HLE prefixes // HLE prefixes

View File

@ -143,13 +143,13 @@ multiclass xop3opimm<bits<8> opc, string OpcodeStr, SDNode OpNode,
(ins VR128:$src1, u8imm:$src2), (ins VR128:$src1, u8imm:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst, [(set VR128:$dst,
(vt128 (OpNode (vt128 VR128:$src1), timm:$src2)))]>, (vt128 (OpNode (vt128 VR128:$src1), imm:$src2)))]>,
XOP, Sched<[sched]>; XOP, Sched<[sched]>;
def mi : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst), def mi : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
(ins i128mem:$src1, u8imm:$src2), (ins i128mem:$src1, u8imm:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst, [(set VR128:$dst,
(vt128 (OpNode (vt128 (load addr:$src1)), timm:$src2)))]>, (vt128 (OpNode (vt128 (load addr:$src1)), imm:$src2)))]>,
XOP, Sched<[sched.Folded, sched.ReadAfterFold]>; XOP, Sched<[sched.Folded, sched.ReadAfterFold]>;
} }
@ -251,7 +251,7 @@ multiclass xopvpcom<bits<8> opc, string Suffix, SDNode OpNode, ValueType vt128,
"\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
[(set VR128:$dst, [(set VR128:$dst,
(vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2), (vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2),
timm:$cc)))]>, imm:$cc)))]>,
XOP_4V, Sched<[sched]>; XOP_4V, Sched<[sched]>;
def mi : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst), def mi : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2, u8imm:$cc), (ins VR128:$src1, i128mem:$src2, u8imm:$cc),
@ -260,12 +260,12 @@ multiclass xopvpcom<bits<8> opc, string Suffix, SDNode OpNode, ValueType vt128,
[(set VR128:$dst, [(set VR128:$dst,
(vt128 (OpNode (vt128 VR128:$src1), (vt128 (OpNode (vt128 VR128:$src1),
(vt128 (load addr:$src2)), (vt128 (load addr:$src2)),
timm:$cc)))]>, imm:$cc)))]>,
XOP_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; XOP_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
} }
def : Pat<(OpNode (load addr:$src2), def : Pat<(OpNode (load addr:$src2),
(vt128 VR128:$src1), timm:$cc), (vt128 VR128:$src1), imm:$cc),
(!cast<Instruction>(NAME#"mi") VR128:$src1, addr:$src2, (!cast<Instruction>(NAME#"mi") VR128:$src1, addr:$src2,
(CommuteVPCOMCC imm:$cc))>; (CommuteVPCOMCC imm:$cc))>;
} }
@ -422,7 +422,7 @@ multiclass xop_vpermil2<bits<8> Opc, string OpcodeStr, RegisterClass RC,
!strconcat(OpcodeStr, !strconcat(OpcodeStr,
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"), "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
[(set RC:$dst, [(set RC:$dst,
(VT (X86vpermil2 RC:$src1, RC:$src2, RC:$src3, (i8 timm:$src4))))]>, (VT (X86vpermil2 RC:$src1, RC:$src2, RC:$src3, (i8 imm:$src4))))]>,
Sched<[sched]>; Sched<[sched]>;
def rm : IXOP5<Opc, MRMSrcMemOp4, (outs RC:$dst), def rm : IXOP5<Opc, MRMSrcMemOp4, (outs RC:$dst),
(ins RC:$src1, RC:$src2, intmemop:$src3, u4imm:$src4), (ins RC:$src1, RC:$src2, intmemop:$src3, u4imm:$src4),
@ -430,7 +430,7 @@ multiclass xop_vpermil2<bits<8> Opc, string OpcodeStr, RegisterClass RC,
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"), "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
[(set RC:$dst, [(set RC:$dst,
(VT (X86vpermil2 RC:$src1, RC:$src2, (IntLdFrag addr:$src3), (VT (X86vpermil2 RC:$src1, RC:$src2, (IntLdFrag addr:$src3),
(i8 timm:$src4))))]>, VEX_W, (i8 imm:$src4))))]>, VEX_W,
Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>; Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>;
def mr : IXOP5<Opc, MRMSrcMem, (outs RC:$dst), def mr : IXOP5<Opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, fpmemop:$src2, RC:$src3, u4imm:$src4), (ins RC:$src1, fpmemop:$src2, RC:$src3, u4imm:$src4),
@ -438,7 +438,7 @@ multiclass xop_vpermil2<bits<8> Opc, string OpcodeStr, RegisterClass RC,
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"), "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
[(set RC:$dst, [(set RC:$dst,
(VT (X86vpermil2 RC:$src1, (FPLdFrag addr:$src2), (VT (X86vpermil2 RC:$src1, (FPLdFrag addr:$src2),
RC:$src3, (i8 timm:$src4))))]>, RC:$src3, (i8 imm:$src4))))]>,
Sched<[sched.Folded, sched.ReadAfterFold, Sched<[sched.Folded, sched.ReadAfterFold,
// fpmemop:$src2 // fpmemop:$src2
ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,

View File

@ -389,7 +389,8 @@ define void @store(i64* %addr, i64 addrspace(42)* %addr42, i64 %val1, i64 %val2)
; CHECK-LABEL: name: intrinsics ; CHECK-LABEL: name: intrinsics
; CHECK: [[CUR:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK: [[CUR:%[0-9]+]]:_(s32) = COPY $w0
; CHECK: [[BITS:%[0-9]+]]:_(s32) = COPY $w1 ; CHECK: [[BITS:%[0-9]+]]:_(s32) = COPY $w1
; CHECK: [[PTR:%[0-9]+]]:_(p0) = G_INTRINSIC intrinsic(@llvm.returnaddress), 0 ; CHECK: [[CREG:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK: [[PTR:%[0-9]+]]:_(p0) = G_INTRINSIC intrinsic(@llvm.returnaddress), [[CREG]]
; CHECK: [[PTR_VEC:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.ptr.vec ; CHECK: [[PTR_VEC:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.ptr.vec
; CHECK: [[VEC:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[PTR_VEC]] ; CHECK: [[VEC:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[PTR_VEC]]
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.st2), [[VEC]](<8 x s8>), [[VEC]](<8 x s8>), [[PTR]](p0) ; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.st2), [[VEC]](<8 x s8>), [[VEC]](<8 x s8>), [[PTR]](p0)

View File

@ -10,20 +10,24 @@ body: |
bb.0: bb.0:
liveins: $vgpr0 liveins: $vgpr0
%0:vgpr(s32) = COPY $vgpr0 %0:vgpr(s32) = COPY $vgpr0
%1:sgpr(s32) = G_CONSTANT i32 1
%2:sgpr(s32) = G_CONSTANT i32 15
%3:sgpr(s1) = G_CONSTANT i1 0
%4:sgpr(s1) = G_CONSTANT i1 1
; CHECK: EXP 1, %0, %0, %0, %0, 0, 0, 15, implicit $exec ; CHECK: EXP 1, %0, %0, %0, %0, 0, 0, 15, implicit $exec
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp),1, 15, %0:vgpr(s32), %0:vgpr(s32), %0:vgpr(s32), %0:vgpr(s32), 0, 0 G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), %1:sgpr(s32), %2:sgpr(s32), %0:vgpr(s32), %0:vgpr(s32), %0:vgpr(s32), %0:vgpr(s32), %3:sgpr(s1), %3:sgpr(s1)
; CHECK: EXP_DONE 1, %0, %0, %0, %0, 0, 0, 15, implicit $exec ; CHECK: EXP_DONE 1, %0, %0, %0, %0, 0, 0, 15, implicit $exec
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 1, 15, %0:vgpr(s32), %0:vgpr(s32), %0:vgpr(s32), %0:vgpr(s32), 1, 0 G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), %1:sgpr(s32), %2:sgpr(s32), %0:vgpr(s32), %0:vgpr(s32), %0:vgpr(s32), %0:vgpr(s32), %4:sgpr(s1), %3:sgpr(s1)
%5:vgpr(<2 x s16>) = G_BITCAST %0(s32) %5:vgpr(<2 x s16>) = G_BITCAST %0(s32)
; CHECK: [[UNDEF0:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; CHECK: [[UNDEF0:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK: EXP 1, %0, %0, [[UNDEF0]], [[UNDEF0]], 0, 1, 15, implicit $exec ; CHECK: EXP 1, %0, %0, [[UNDEF0]], [[UNDEF0]], 0, 1, 15, implicit $exec
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.compr), 1, 15, %5:vgpr(<2 x s16>), %5:vgpr(<2 x s16>), 0, 0 G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.compr), %1:sgpr(s32), %2:sgpr(s32), %5:vgpr(<2 x s16>), %5:vgpr(<2 x s16>), %3:sgpr(s1), %3:sgpr(s1)
; CHECK: [[UNDEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; CHECK: [[UNDEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK: EXP_DONE 1, %0, %0, [[UNDEF1]], [[UNDEF1]], 0, 1, 15, implicit $exec ; CHECK: EXP_DONE 1, %0, %0, [[UNDEF1]], [[UNDEF1]], 0, 1, 15, implicit $exec
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.compr), 1, 15, %5:vgpr(<2 x s16>), %5:vgpr(<2 x s16>), 1, 0 G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.compr), %1:sgpr(s32), %2:sgpr(s32), %5:vgpr(<2 x s16>), %5:vgpr(<2 x s16>), %4:sgpr(s1), %3:sgpr(s1)

View File

@ -18,7 +18,8 @@ body: |
; GCN: S_SENDMSG 1, implicit $exec, implicit $m0 ; GCN: S_SENDMSG 1, implicit $exec, implicit $m0
; GCN: S_ENDPGM 0 ; GCN: S_ENDPGM 0
%0:sgpr(s32) = COPY $sgpr0 %0:sgpr(s32) = COPY $sgpr0
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.sendmsg), 1, %0(s32) %2:sgpr(s32) = G_CONSTANT i32 1
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.sendmsg), %2(s32), %0(s32)
S_ENDPGM 0 S_ENDPGM 0
... ...

View File

@ -1,15 +0,0 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
; RUN: llc -march=amdgcn -O0 -stop-after=irtranslator -global-isel -verify-machineinstrs %s -o - | FileCheck %s
declare void @llvm.amdgcn.s.sendmsg(i32 immarg, i32)
define amdgpu_ps void @test_sendmsg(i32 inreg %m0) {
; CHECK-LABEL: name: test_sendmsg
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr0
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.sendmsg), 12, [[COPY]](s32)
; CHECK: S_ENDPGM
call void @llvm.amdgcn.s.sendmsg(i32 12, i32 %m0)
ret void
}

View File

@ -9,7 +9,10 @@ define amdgpu_ps void @disabled_input(float inreg %arg0, float %psinput0, float
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 0, 15, [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY1]](s32), 0, 0 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
; CHECK: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), [[C]](s32), [[C1]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY1]](s32), [[C2]](s1), [[C2]](s1)
; CHECK: S_ENDPGM 0 ; CHECK: S_ENDPGM 0
main_body: main_body:
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg0, float %arg0, float %arg0, float %psinput1, i1 false, i1 false) #0 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg0, float %arg0, float %arg0, float %psinput1, i1 false, i1 false) #0
@ -24,14 +27,17 @@ define amdgpu_ps void @disabled_input_struct(float inreg %arg0, { float, float }
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 0, 15, [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY1]](s32), 0, 0 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
; CHECK: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), [[C]](s32), [[C1]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY1]](s32), [[C2]](s1), [[C2]](s1)
; CHECK: S_ENDPGM 0 ; CHECK: S_ENDPGM 0
main_body: main_body:
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg0, float %arg0, float %arg0, float %psinput1, i1 false, i1 false) #0 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg0, float %arg0, float %arg0, float %psinput1, i1 false, i1 false) #0
ret void ret void
} }
declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) #0 declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
attributes #0 = { nounwind } attributes #0 = { nounwind }
attributes #1 = { "InitialPSInputAddr"="0x00002" } attributes #1 = { "InitialPSInputAddr"="0x00002" }

View File

@ -1,8 +1,9 @@
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -stop-after=irtranslator -global-isel %s -o - | FileCheck %s ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -stop-after=irtranslator -global-isel %s -o - | FileCheck %s
; CHECK-LABEL: name: test_f32_inreg ; CHECK-LABEL: name: test_f32_inreg
; CHECK: [[S0:%[0-9]+]]:_(s32) = COPY $sgpr2 ; CHECK: [[S0:%[0-9]+]]:_(s32) = COPY $sgpr2
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 32, 15, [[S0]] ; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), %{{[0-9]+}}(s32), %{{[0-9]+}}(s32), [[S0]]
define amdgpu_vs void @test_f32_inreg(float inreg %arg0) { define amdgpu_vs void @test_f32_inreg(float inreg %arg0) {
call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %arg0, float undef, float undef, float undef, i1 false, i1 false) #0 call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %arg0, float undef, float undef, float undef, i1 false, i1 false) #0
ret void ret void
@ -10,7 +11,7 @@ define amdgpu_vs void @test_f32_inreg(float inreg %arg0) {
; CHECK-LABEL: name: test_f32 ; CHECK-LABEL: name: test_f32
; CHECK: [[V0:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[V0:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 32, 15, [[V0]] ; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), %{{[0-9]+}}(s32), %{{[0-9]+}}(s32), [[V0]]
define amdgpu_vs void @test_f32(float %arg0) { define amdgpu_vs void @test_f32(float %arg0) {
call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %arg0, float undef, float undef, float undef, i1 false, i1 false) #0 call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %arg0, float undef, float undef, float undef, i1 false, i1 false) #0
ret void ret void
@ -32,7 +33,7 @@ define amdgpu_vs void @test_ptr2_inreg(i32 addrspace(4)* inreg %arg0) {
; CHECK: [[S4:%[0-9]+]]:_(s32) = COPY $sgpr4 ; CHECK: [[S4:%[0-9]+]]:_(s32) = COPY $sgpr4
; CHECK: [[S34:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[S3]](s32), [[S4]](s32) ; CHECK: [[S34:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[S3]](s32), [[S4]](s32)
; CHECK: G_LOAD [[S34]] ; CHECK: G_LOAD [[S34]]
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 32, 15, [[S2]](s32) ; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), %{{[0-9]+}}(s32), %{{[0-9]+}}(s32), [[S2]]
define amdgpu_vs void @test_sgpr_alignment0(float inreg %arg0, i32 addrspace(4)* inreg %arg1) { define amdgpu_vs void @test_sgpr_alignment0(float inreg %arg0, i32 addrspace(4)* inreg %arg1) {
%tmp0 = load volatile i32, i32 addrspace(4)* %arg1 %tmp0 = load volatile i32, i32 addrspace(4)* %arg1
call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %arg0, float undef, float undef, float undef, i1 false, i1 false) #0 call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %arg0, float undef, float undef, float undef, i1 false, i1 false) #0
@ -44,7 +45,7 @@ define amdgpu_vs void @test_sgpr_alignment0(float inreg %arg0, i32 addrspace(4)*
; CHECK: [[S1:%[0-9]+]]:_(s32) = COPY $sgpr3 ; CHECK: [[S1:%[0-9]+]]:_(s32) = COPY $sgpr3
; CHECK: [[V0:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[V0:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK: [[V1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[V1:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 32, 15, [[V0]](s32), [[S0]](s32), [[V1]](s32), [[S1]](s32) ; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), %{{[0-9]+}}(s32), %{{[0-9]+}}(s32), [[V0]](s32), [[S0]](s32), [[V1]](s32), [[S1]](s32)
define amdgpu_vs void @test_order(float inreg %arg0, float inreg %arg1, float %arg2, float %arg3) { define amdgpu_vs void @test_order(float inreg %arg0, float inreg %arg1, float %arg2, float %arg3) {
call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %arg2, float %arg0, float %arg3, float %arg1, i1 false, i1 false) #0 call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %arg2, float %arg0, float %arg3, float %arg1, i1 false, i1 false) #0
ret void ret void

View File

@ -9,13 +9,14 @@ define amdgpu_ps void @test_div_scale(float %arg0, float %arg1) {
; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK: liveins: $vgpr0, $vgpr1
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), -1 ; CHECK: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), [[C]](s1)
; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[INT1]](s1) ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[INT1]](s1)
; CHECK: G_STORE [[INT]](s32), [[DEF]](p1) :: (store 4 into `float addrspace(1)* undef`, addrspace 1) ; CHECK: G_STORE [[INT]](s32), [[DEF]](p1) :: (store 4 into `float addrspace(1)* undef`, addrspace 1)
; CHECK: G_STORE [[SEXT]](s32), [[DEF1]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) ; CHECK: G_STORE [[SEXT]](s32), [[DEF1]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
; CHECK: S_ENDPGM 0 ; CHECK: S_ENDPGM
%call = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %arg0, float %arg1, i1 true) %call = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %arg0, float %arg1, i1 true)
%extract0 = extractvalue { float, i1 } %call, 0 %extract0 = extractvalue { float, i1 } %call, 0
%extract1 = extractvalue { float, i1 } %call, 1 %extract1 = extractvalue { float, i1 } %call, 1

View File

@ -1,519 +0,0 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -stop-after=instruction-select -o - %s | FileCheck -check-prefix=UNPACKED %s
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=instruction-select -o - %s | FileCheck -check-prefix=PACKED %s
define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f16(<4 x i32> inreg %rsrc, half %val, i32 %voffset, i32 inreg %soffset) {
; UNPACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f16
; UNPACKED: bb.1 (%ir-block.0):
; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
; UNPACKED: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; UNPACKED: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom TargetCustom7, align 1, addrspace 4)
; UNPACKED: S_ENDPGM 0
; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f16
; PACKED: bb.1 (%ir-block.0):
; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
; PACKED: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; PACKED: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; PACKED: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; PACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; PACKED: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom TargetCustom7, align 1, addrspace 4)
; PACKED: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.format.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__voffset_4095__sgpr_soffset_f16(<4 x i32> inreg %rsrc, half %val, i32 inreg %soffset) {
; UNPACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__voffset_4095__sgpr_soffset_f16
; UNPACKED: bb.1 (%ir-block.0):
; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
; UNPACKED: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; UNPACKED: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY4]], $noreg, [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom TargetCustom7 + 4095, align 1, addrspace 4)
; UNPACKED: S_ENDPGM 0
; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__voffset_4095__sgpr_soffset_f16
; PACKED: bb.1 (%ir-block.0):
; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
; PACKED: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; PACKED: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; PACKED: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; PACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; PACKED: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY4]], $noreg, [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom TargetCustom7 + 4095, align 1, addrspace 4)
; PACKED: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.format.f16(half %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16(<4 x i32> inreg %rsrc, <2 x half> %val, i32 %voffset, i32 inreg %soffset) {
; UNPACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16
; UNPACKED: bb.1 (%ir-block.0):
; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
; UNPACKED: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 16
; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY7]], [[COPY4]], implicit $exec
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1
; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; UNPACKED: S_ENDPGM 0
; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16
; PACKED: bb.1 (%ir-block.0):
; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
; PACKED: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; PACKED: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; PACKED: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; PACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; PACKED: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f16(<4 x i32> inreg %rsrc, <4 x half> %val, i32 %voffset, i32 inreg %soffset) {
; UNPACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f16
; UNPACKED: bb.1 (%ir-block.0):
; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2
; UNPACKED: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; UNPACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 16
; UNPACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY8]], [[COPY4]], implicit $exec
; UNPACKED: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; UNPACKED: [[V_LSHRREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY9]], [[COPY5]], implicit $exec
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1, [[COPY5]], %subreg.sub2, [[V_LSHRREV_B32_e64_1]], %subreg.sub3
; UNPACKED: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
; UNPACKED: S_ENDPGM 0
; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f16
; PACKED: bb.1 (%ir-block.0):
; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2
; PACKED: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; PACKED: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; PACKED: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; PACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; PACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; PACKED: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
; PACKED: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.format.v4f16(<4 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
}
; Make sure unpack code is emitted outside of loop
define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f16(<4 x i32> %rsrc, <4 x half> %val, i32 %voffset, i32 inreg %soffset) {
; UNPACKED-LABEL: name: raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f16
; UNPACKED: bb.1 (%ir-block.0):
; UNPACKED: successors: %bb.2(0x80000000)
; UNPACKED: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6
; UNPACKED: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; UNPACKED: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; UNPACKED: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; UNPACKED: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5
; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6
; UNPACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr2
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 16
; UNPACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY8]], [[COPY4]], implicit $exec
; UNPACKED: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; UNPACKED: [[V_LSHRREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY9]], [[COPY5]], implicit $exec
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1, [[COPY5]], %subreg.sub2, [[V_LSHRREV_B32_e64_1]], %subreg.sub3
; UNPACKED: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
; UNPACKED: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
; UNPACKED: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; UNPACKED: bb.2:
; UNPACKED: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; UNPACKED: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub0, implicit $exec
; UNPACKED: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub1, implicit $exec
; UNPACKED: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
; UNPACKED: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY10]], implicit $exec
; UNPACKED: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub0, implicit $exec
; UNPACKED: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub1, implicit $exec
; UNPACKED: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
; UNPACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY11]], implicit $exec
; UNPACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; UNPACKED: [[REG_SEQUENCE4:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
; UNPACKED: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; UNPACKED: bb.3:
; UNPACKED: successors: %bb.4(0x80000000)
; UNPACKED: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; UNPACKED: bb.4:
; UNPACKED: S_ENDPGM 0
; PACKED-LABEL: name: raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f16
; PACKED: bb.1 (%ir-block.0):
; PACKED: successors: %bb.2(0x80000000)
; PACKED: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6
; PACKED: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; PACKED: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; PACKED: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; PACKED: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5
; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6
; PACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr2
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; PACKED: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
; PACKED: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
; PACKED: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; PACKED: bb.2:
; PACKED: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; PACKED: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec
; PACKED: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec
; PACKED: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
; PACKED: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec
; PACKED: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec
; PACKED: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec
; PACKED: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
; PACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY9]], implicit $exec
; PACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; PACKED: [[REG_SEQUENCE4:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
; PACKED: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; PACKED: bb.3:
; PACKED: successors: %bb.4(0x80000000)
; PACKED: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; PACKED: bb.4:
; PACKED: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.format.v4f16(<4 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset4095(<4 x i32> inreg %rsrc, <2 x half> %val, i32 %voffset, i32 inreg %soffset) {
; UNPACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset4095
; UNPACKED: bb.1 (%ir-block.0):
; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
; UNPACKED: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095
; UNPACKED: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 16
; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY6]], [[COPY4]], implicit $exec
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1
; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; UNPACKED: S_ENDPGM 0
; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset4095
; PACKED: bb.1 (%ir-block.0):
; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
; PACKED: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; PACKED: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; PACKED: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; PACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; PACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095
; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; PACKED: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 4095, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset4096(<4 x i32> inreg %rsrc, <2 x half> %val, i32 %voffset, i32 inreg %soffset) {
; UNPACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset4096
; UNPACKED: bb.1 (%ir-block.0):
; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
; UNPACKED: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
; UNPACKED: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 16
; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY6]], [[COPY4]], implicit $exec
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1
; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; UNPACKED: S_ENDPGM 0
; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset4096
; PACKED: bb.1 (%ir-block.0):
; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
; PACKED: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; PACKED: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; PACKED: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; PACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; PACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; PACKED: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 4096, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_voffset_add_16(<4 x i32> inreg %rsrc, <2 x half> %val, i32 %voffset, i32 inreg %soffset) {
; UNPACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_voffset_add_16
; UNPACKED: bb.1 (%ir-block.0):
; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
; UNPACKED: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 16
; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; UNPACKED: %11:vgpr_32, dead %21:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
; UNPACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY8]], [[COPY4]], implicit $exec
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1
; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; UNPACKED: S_ENDPGM 0
; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_voffset_add_16
; PACKED: bb.1 (%ir-block.0):
; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
; PACKED: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; PACKED: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; PACKED: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; PACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; PACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 16
; PACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; PACKED: %11:vgpr_32, dead %13:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; PACKED: S_ENDPGM 0
%voffset.add = add i32 %voffset, 16
call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_voffset_add_4095(<4 x i32> inreg %rsrc, <2 x half> %val, i32 %voffset, i32 inreg %soffset) {
; UNPACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_voffset_add_4095
; UNPACKED: bb.1 (%ir-block.0):
; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
; UNPACKED: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4095
; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; UNPACKED: %11:vgpr_32, dead %22:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
; UNPACKED: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 16
; UNPACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY8]], [[COPY4]], implicit $exec
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1
; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; UNPACKED: S_ENDPGM 0
; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_voffset_add_4095
; PACKED: bb.1 (%ir-block.0):
; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
; PACKED: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; PACKED: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; PACKED: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; PACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; PACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4095
; PACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; PACKED: %11:vgpr_32, dead %13:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; PACKED: S_ENDPGM 0
%voffset.add = add i32 %voffset, 4095
call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_voffset_add_4096(<4 x i32> inreg %rsrc, <2 x half> %val, i32 %voffset, i32 inreg %soffset) {
; UNPACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_voffset_add_4096
; UNPACKED: bb.1 (%ir-block.0):
; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
; UNPACKED: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4096
; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; UNPACKED: %11:vgpr_32, dead %22:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
; UNPACKED: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 16
; UNPACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY8]], [[COPY4]], implicit $exec
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1
; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; UNPACKED: S_ENDPGM 0
; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_voffset_add_4096
; PACKED: bb.1 (%ir-block.0):
; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
; PACKED: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; PACKED: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; PACKED: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; PACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; PACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4096
; PACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; PACKED: %11:vgpr_32, dead %13:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; PACKED: S_ENDPGM 0
%voffset.add = add i32 %voffset, 4096
call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
ret void
}
; Check what happens with offset add inside a waterfall loop
define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f16_add_4096(<4 x i32> %rsrc, <4 x half> %val, i32 %voffset, i32 inreg %soffset) {
; UNPACKED-LABEL: name: raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f16_add_4096
; UNPACKED: bb.1 (%ir-block.0):
; UNPACKED: successors: %bb.2(0x80000000)
; UNPACKED: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6
; UNPACKED: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; UNPACKED: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; UNPACKED: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; UNPACKED: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5
; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6
; UNPACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr2
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4096
; UNPACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; UNPACKED: %13:vgpr_32, dead %47:sreg_64_xexec = V_ADD_I32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec
; UNPACKED: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 16
; UNPACKED: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY9]], [[COPY4]], implicit $exec
; UNPACKED: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
; UNPACKED: [[V_LSHRREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY10]], [[COPY5]], implicit $exec
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1, [[COPY5]], %subreg.sub2, [[V_LSHRREV_B32_e64_1]], %subreg.sub3
; UNPACKED: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
; UNPACKED: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
; UNPACKED: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; UNPACKED: bb.2:
; UNPACKED: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; UNPACKED: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub0, implicit $exec
; UNPACKED: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub1, implicit $exec
; UNPACKED: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
; UNPACKED: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY11]], implicit $exec
; UNPACKED: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub0, implicit $exec
; UNPACKED: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub1, implicit $exec
; UNPACKED: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
; UNPACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY12]], implicit $exec
; UNPACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; UNPACKED: [[REG_SEQUENCE4:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
; UNPACKED: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; UNPACKED: bb.3:
; UNPACKED: successors: %bb.4(0x80000000)
; UNPACKED: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; UNPACKED: bb.4:
; UNPACKED: S_ENDPGM 0
; PACKED-LABEL: name: raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f16_add_4096
; PACKED: bb.1 (%ir-block.0):
; PACKED: successors: %bb.2(0x80000000)
; PACKED: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6
; PACKED: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; PACKED: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; PACKED: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; PACKED: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5
; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6
; PACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr2
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; PACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4096
; PACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; PACKED: %13:vgpr_32, dead %31:sreg_64_xexec = V_ADD_I32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec
; PACKED: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
; PACKED: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
; PACKED: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; PACKED: bb.2:
; PACKED: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; PACKED: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec
; PACKED: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec
; PACKED: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
; PACKED: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY9]], implicit $exec
; PACKED: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub0, implicit $exec
; PACKED: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub1, implicit $exec
; PACKED: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
; PACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY10]], implicit $exec
; PACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; PACKED: [[REG_SEQUENCE4:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
; PACKED: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; PACKED: bb.3:
; PACKED: successors: %bb.4(0x80000000)
; PACKED: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; PACKED: bb.4:
; PACKED: S_ENDPGM 0
%voffset.add = add i32 %voffset, 4096
call void @llvm.amdgcn.raw.buffer.store.format.v4f16(<4 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
ret void
}
declare void @llvm.amdgcn.raw.buffer.store.format.f16(half, <4 x i32>, i32, i32, i32 immarg)
declare void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half>, <4 x i32>, i32, i32, i32 immarg)
declare void @llvm.amdgcn.raw.buffer.store.format.v4f16(<4 x half>, <4 x i32>, i32, i32, i32 immarg)

View File

@ -1,314 +0,0 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -stop-after=instruction-select -o - %s | FileCheck %s
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=instruction-select -o - %s | FileCheck %s
define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f32(<4 x i32> inreg %rsrc, float %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f32
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: BUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.format.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__voffset_4095__sgpr_soffset_f32(<4 x i32> inreg %rsrc, float %val, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__voffset_4095__sgpr_soffset_f32
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: BUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY4]], $noreg, [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7 + 4095, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.format.f32(float %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32(<4 x i32> inreg %rsrc, <2 x float> %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v3f32(<4 x i32> inreg %rsrc, <3 x float> %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v3f32
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; CHECK: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2
; CHECK: BUFFER_STORE_FORMAT_XYZ_OFFEN_exact [[REG_SEQUENCE1]], [[COPY7]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.format.v3f32(<3 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f32(<4 x i32> inreg %rsrc, <4 x float> %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f32
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4
; CHECK: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3
; CHECK: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE]], [[COPY9]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.format.v4f32(<4 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f32(<4 x i32> %rsrc, <4 x float> %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f32
; CHECK: bb.1 (%ir-block.0):
; CHECK: successors: %bb.2(0x80000000)
; CHECK: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr7
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr8
; CHECK: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr2
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3
; CHECK: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
; CHECK: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub0, implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub1, implicit $exec
; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY10]], implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub0, implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub1, implicit $exec
; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY11]], implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
; CHECK: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE4]], [[COPY9]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; CHECK: bb.4:
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.format.v4f32(<4 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32_soffset4095(<4 x i32> inreg %rsrc, <2 x float> %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32_soffset4095
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095
; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 4095, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32_soffset4096(<4 x i32> inreg %rsrc, <2 x float> %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32_soffset4096
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 4096, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32_voffset_add_16(<4 x i32> inreg %rsrc, <2 x float> %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32_voffset_add_16
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 16
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; CHECK: %13:vgpr_32, dead %15:sreg_64_xexec = V_ADD_I32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec
; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
%voffset.add = add i32 %voffset, 16
call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32_voffset_add_4095(<4 x i32> inreg %rsrc, <2 x float> %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32_voffset_add_4095
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4095
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; CHECK: %13:vgpr_32, dead %15:sreg_64_xexec = V_ADD_I32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec
; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
%voffset.add = add i32 %voffset, 4095
call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32_voffset_add_4096(<4 x i32> inreg %rsrc, <2 x float> %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32_voffset_add_4096
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4096
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; CHECK: %13:vgpr_32, dead %15:sreg_64_xexec = V_ADD_I32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec
; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
%voffset.add = add i32 %voffset, 4096
call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
ret void
}
; Check what happens with offset add inside a waterfall loop
define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f32_add_4096(<4 x i32> %rsrc, <4 x float> %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f32_add_4096
; CHECK: bb.1 (%ir-block.0):
; CHECK: successors: %bb.2(0x80000000)
; CHECK: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr7
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr8
; CHECK: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr2
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4096
; CHECK: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; CHECK: %15:vgpr_32, dead %33:sreg_64_xexec = V_ADD_I32_e64 [[COPY8]], [[COPY10]], 0, implicit $exec
; CHECK: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
; CHECK: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub0, implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub1, implicit $exec
; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY11]], implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub0, implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub1, implicit $exec
; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY12]], implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
; CHECK: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], %15, [[REG_SEQUENCE4]], [[COPY9]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; CHECK: bb.4:
; CHECK: S_ENDPGM 0
%voffset.add = add i32 %voffset, 4096
call void @llvm.amdgcn.raw.buffer.store.format.v4f32(<4 x float> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
ret void
}
declare void @llvm.amdgcn.raw.buffer.store.format.f32(float, <4 x i32>, i32, i32, i32 immarg)
declare void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float>, <4 x i32>, i32, i32, i32 immarg)
declare void @llvm.amdgcn.raw.buffer.store.format.v3f32(<3 x float>, <4 x i32>, i32, i32, i32 immarg)
declare void @llvm.amdgcn.raw.buffer.store.format.v4f32(<4 x float>, <4 x i32>, i32, i32, i32 immarg)

View File

@ -1,791 +0,0 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck %s
; FIXME: Test with SI when argument lowering not broken for f16
; Natural mapping
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, float %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
}
; Copies for VGPR arguments
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__sgpr_val__sgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, float inreg %val, i32 inreg %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__sgpr_val__sgpr_voffset__sgpr_soffset
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr6
; CHECK: [[COPY5:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr7
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr8
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]]
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY7]], [[COPY8]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
}
; Waterfall for rsrc
define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset(<4 x i32> %rsrc, float %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset
; CHECK: bb.1 (%ir-block.0):
; CHECK: successors: %bb.2(0x80000000)
; CHECK: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr2
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
; CHECK: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec
; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; CHECK: bb.4:
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
}
; Waterfall for soffset
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__vgpr_soffset(<4 x i32> inreg %rsrc, float %val, i32 %voffset, i32 %soffset) {
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__vgpr_soffset
; CHECK: bb.1 (%ir-block.0):
; CHECK: successors: %bb.2(0x80000000)
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0, $vgpr1, $vgpr2
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY6]], implicit $exec
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[V_READFIRSTLANE_B32_]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; CHECK: bb.4:
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
}
; Waterfall for rsrc and soffset
define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__vgpr_soffset(<4 x i32> %rsrc, float %val, i32 %voffset, i32 %soffset) {
; CHECK-LABEL: name: raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__vgpr_soffset
; CHECK: bb.1 (%ir-block.0):
; CHECK: successors: %bb.2(0x80000000)
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6
; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
; CHECK: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec
; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; CHECK: bb.4:
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_glc(<4 x i32> inreg %rsrc, float %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_glc
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 1)
ret void
}
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_slc(<4 x i32> inreg %rsrc, float %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_slc
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 1, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 2)
ret void
}
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_glc_slc(<4 x i32> inreg %rsrc, float %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_glc_slc
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 1, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 3)
ret void
}
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_dlc(<4 x i32> inreg %rsrc, float %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_dlc
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 1, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 4)
ret void
}
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_slc_dlc(<4 x i32> inreg %rsrc, float %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_slc_dlc
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 1, 0, 1, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 6)
ret void
}
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_glc_dlc(<4 x i32> inreg %rsrc, float %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_glc_dlc
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, 0, 1, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 5)
ret void
}
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_glc_slc_dlc(<4 x i32> inreg %rsrc, float %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_glc_slc_dlc
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 1, 0, 1, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 7)
ret void
}
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32(<4 x i32> inreg %rsrc, <2 x float> %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; CHECK: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v3f32(<4 x i32> inreg %rsrc, <3 x float> %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v3f32
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; CHECK: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2
; CHECK: BUFFER_STORE_DWORDX3_OFFEN_exact [[REG_SEQUENCE1]], [[COPY7]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.v3f32(<3 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f32(<4 x i32> inreg %rsrc, <4 x float> %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f32
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4
; CHECK: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3
; CHECK: BUFFER_STORE_DWORDX4_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE]], [[COPY9]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_i8(<4 x i32> inreg %rsrc, i32 %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_i8
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: BUFFER_STORE_BYTE_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom TargetCustom7, addrspace 4)
; CHECK: S_ENDPGM 0
%val.trunc = trunc i32 %val to i8
call void @llvm.amdgcn.raw.buffer.store.i8(i8 %val.trunc, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_i16(<4 x i32> inreg %rsrc, i32 %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_i16
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: BUFFER_STORE_SHORT_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
%val.trunc = trunc i32 %val to i16
call void @llvm.amdgcn.raw.buffer.store.i16(i16 %val.trunc, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f16(<4 x i32> inreg %rsrc, half %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f16
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: BUFFER_STORE_SHORT_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16(<4 x i32> inreg %rsrc, <2 x half> %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f16(<4 x i32> inreg %rsrc, <4 x half> %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f16
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; CHECK: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.v4f16(<4 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f16(<4 x i32> %rsrc, <4 x half> %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f16
; CHECK: bb.1 (%ir-block.0):
; CHECK: successors: %bb.2(0x80000000)
; CHECK: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6
; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr2
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; CHECK: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
; CHECK: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec
; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec
; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY9]], implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
; CHECK: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; CHECK: bb.4:
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.v4f16(<4 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__sgpr_soffset_f32_voffset4095(<4 x i32> inreg %rsrc, float %val, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__sgpr_soffset_f32_voffset4095
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], $noreg, [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7 + 4095, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__sgpr_soffset_f32_voffset4096(<4 x i32> inreg %rsrc, float %val, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__sgpr_soffset_f32_voffset4096
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7 + 4096, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 4096, i32 %soffset, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f32_voffset_add_16(<4 x i32> inreg %rsrc, float %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f32_voffset_add_16
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 16
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; CHECK: %11:vgpr_32, dead %13:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
%voffset.add = add i32 %voffset, 16
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f32_voffset_add_4095(<4 x i32> inreg %rsrc, float %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f32_voffset_add_4095
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4095
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; CHECK: %11:vgpr_32, dead %13:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
%voffset.add = add i32 %voffset, 4095
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f32_voffset_add_4096(<4 x i32> inreg %rsrc, float %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f32_voffset_add_4096
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4096
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; CHECK: %11:vgpr_32, dead %13:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
%voffset.add = add i32 %voffset, 4096
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset4095(<4 x i32> inreg %rsrc, <2 x half> %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset4095
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 4095, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset4096(<4 x i32> inreg %rsrc, <2 x half> %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset4096
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 4096, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset_add_16(<4 x i32> inreg %rsrc, <2 x half> %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset_add_16
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 16
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; CHECK: %11:vgpr_32, dead %13:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
%voffset.add = add i32 %voffset, 16
call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset_add_4095(<4 x i32> inreg %rsrc, <2 x half> %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset_add_4095
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4095
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; CHECK: %11:vgpr_32, dead %13:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
%voffset.add = add i32 %voffset, 4095
call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset_add_4096(<4 x i32> inreg %rsrc, <2 x half> %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset_add_4096
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4096
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; CHECK: %11:vgpr_32, dead %13:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
%voffset.add = add i32 %voffset, 4096
call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
ret void
}
; An add of the offset is necessary, with a waterfall loop. Make sure the add is done outside of the waterfall loop.
define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_offset_add_5000(<4 x i32> %rsrc, float %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_offset_add_5000
; CHECK: bb.1 (%ir-block.0):
; CHECK: successors: %bb.2(0x80000000)
; CHECK: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr2
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 5000
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; CHECK: %11:vgpr_32, dead %29:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
; CHECK: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
; CHECK: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY8]], implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec
; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY9]], implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE3]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; CHECK: bb.4:
; CHECK: S_ENDPGM 0
%voffset.add = add i32 %voffset, 5000
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
ret void
}
; An add of the offset is necessary, with a waterfall loop. Make sure the add is done outside of the waterfall loop.
define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__5000_voffset__sgpr_soffset_offset(<4 x i32> %rsrc, float %val, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store__vgpr_rsrc__vgpr_val__5000_voffset__sgpr_soffset_offset
; CHECK: bb.1 (%ir-block.0):
; CHECK: successors: %bb.2(0x80000000)
; CHECK: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
; CHECK: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
; CHECK: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec
; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[V_MOV_B32_e32_]], [[REG_SEQUENCE3]], [[COPY5]], 904, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7 + 5000, align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; CHECK: bb.4:
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 5000, i32 %soffset, i32 0)
ret void
}
declare void @llvm.amdgcn.raw.buffer.store.i8(i8, <4 x i32>, i32, i32, i32 immarg)
declare void @llvm.amdgcn.raw.buffer.store.i16(i16, <4 x i32>, i32, i32, i32 immarg)
declare void @llvm.amdgcn.raw.buffer.store.f16(half, <4 x i32>, i32, i32, i32 immarg)
declare void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half>, <4 x i32>, i32, i32, i32 immarg)
declare void @llvm.amdgcn.raw.buffer.store.v4f16(<4 x half>, <4 x i32>, i32, i32, i32 immarg)
declare void @llvm.amdgcn.raw.buffer.store.f32(float, <4 x i32>, i32, i32, i32 immarg)
declare void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float>, <4 x i32>, i32, i32, i32 immarg)
declare void @llvm.amdgcn.raw.buffer.store.v3f32(<3 x float>, <4 x i32>, i32, i32, i32 immarg)
declare void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32 immarg)

View File

@ -1,45 +0,0 @@
; RUN: llc -global-isel -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
declare void @llvm.amdgcn.s.sleep(i32) #0
; GCN-LABEL: {{^}}test_s_sleep:
; GCN: s_sleep 0{{$}}
; GCN: s_sleep 1{{$}}
; GCN: s_sleep 2{{$}}
; GCN: s_sleep 3{{$}}
; GCN: s_sleep 4{{$}}
; GCN: s_sleep 5{{$}}
; GCN: s_sleep 6{{$}}
; GCN: s_sleep 7{{$}}
; GCN: s_sleep 8{{$}}
; GCN: s_sleep 9{{$}}
; GCN: s_sleep 10{{$}}
; GCN: s_sleep 11{{$}}
; GCN: s_sleep 12{{$}}
; GCN: s_sleep 13{{$}}
; GCN: s_sleep 14{{$}}
; GCN: s_sleep 15{{$}}
define amdgpu_kernel void @test_s_sleep(i32 %x) #0 {
call void @llvm.amdgcn.s.sleep(i32 0)
call void @llvm.amdgcn.s.sleep(i32 1)
call void @llvm.amdgcn.s.sleep(i32 2)
call void @llvm.amdgcn.s.sleep(i32 3)
call void @llvm.amdgcn.s.sleep(i32 4)
call void @llvm.amdgcn.s.sleep(i32 5)
call void @llvm.amdgcn.s.sleep(i32 6)
call void @llvm.amdgcn.s.sleep(i32 7)
; Values that might only work on VI
call void @llvm.amdgcn.s.sleep(i32 8)
call void @llvm.amdgcn.s.sleep(i32 9)
call void @llvm.amdgcn.s.sleep(i32 10)
call void @llvm.amdgcn.s.sleep(i32 11)
call void @llvm.amdgcn.s.sleep(i32 12)
call void @llvm.amdgcn.s.sleep(i32 13)
call void @llvm.amdgcn.s.sleep(i32 14)
call void @llvm.amdgcn.s.sleep(i32 15)
ret void
}
attributes #0 = { nounwind }

View File

@ -23,20 +23,28 @@ body: |
bb.0: bb.0:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3
; CHECK-LABEL: name: exp_s ; CHECK-LABEL: name: exp_s
; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
; CHECK: [[C2:%[0-9]+]]:sgpr(s1) = G_CONSTANT i1 false
; CHECK: [[C3:%[0-9]+]]:sgpr(s1) = G_CONSTANT i1 false
; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32)
; CHECK: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[COPY3]](s32) ; CHECK: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[COPY3]](s32)
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 0, 0, [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), 0, 0 ; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), [[C]](s32), [[C1]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[C2]](s1), [[C3]](s1)
%0:_(s32) = COPY $sgpr0 %0:_(s32) = G_CONSTANT i32 0
%1:_(s32) = COPY $sgpr1 %1:_(s32) = G_CONSTANT i32 0
%2:_(s32) = COPY $sgpr2 %2:_(s32) = COPY $sgpr0
%3:_(s32) = COPY $sgpr3 %3:_(s32) = COPY $sgpr1
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.f32), 0, 0, %0, %1, %2, %3, 0, 0 %4:_(s32) = COPY $sgpr2
%5:_(s32) = COPY $sgpr3
%6:_(s1) = G_CONSTANT i1 0
%7:_(s1) = G_CONSTANT i1 0
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.f32), %0, %1, %2, %3, %4, %5, %6, %7
... ...
--- ---
name: exp_v name: exp_v
@ -46,14 +54,22 @@ body: |
bb.0: bb.0:
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
; CHECK-LABEL: name: exp_v ; CHECK-LABEL: name: exp_v
; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 0, 0, [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), 0, 0 ; CHECK: [[C2:%[0-9]+]]:sgpr(s1) = G_CONSTANT i1 false
%0:_(s32) = COPY $vgpr0 ; CHECK: [[C3:%[0-9]+]]:sgpr(s1) = G_CONSTANT i1 false
%1:_(s32) = COPY $vgpr1 ; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), [[C]](s32), [[C1]](s32), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[C2]](s1), [[C3]](s1)
%2:_(s32) = COPY $vgpr2 %0:_(s32) = G_CONSTANT i32 0
%3:_(s32) = COPY $vgpr3 %1:_(s32) = G_CONSTANT i32 0
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.f32), 0, 0, %0, %1, %2, %3, 0, 0 %2:_(s32) = COPY $vgpr0
%3:_(s32) = COPY $vgpr1
%4:_(s32) = COPY $vgpr2
%5:_(s32) = COPY $vgpr3
%6:_(s1) = G_CONSTANT i1 0
%7:_(s1) = G_CONSTANT i1 0
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.f32), %0, %1, %2, %3, %4, %5, %6, %7
... ...

View File

@ -1,21 +0,0 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs %s -o - | FileCheck %s
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs %s -o - | FileCheck %s
---
name: ds_swizzle_s
legalized: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $sgpr0
; CHECK-LABEL: name: ds_swizzle_s
; CHECK: liveins: $sgpr0
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ds.swizzle), [[COPY1]](s32), 0
%0:_(s32) = COPY $sgpr0
%1:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ds.swizzle), %0, 0
...

View File

@ -1,181 +0,0 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -stop-after=regbankselect -regbankselect-fast -o - %s | FileCheck %s
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -stop-after=regbankselect -regbankselect-greedy -o - %s | FileCheck %s
; Natural mapping
define amdgpu_ps void @load_1d_vgpr_vaddr__sgpr_srsrc(<8 x i32> inreg %rsrc, i32 %s) {
; CHECK-LABEL: name: load_1d_vgpr_vaddr__sgpr_srsrc
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6
; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7
; CHECK: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8
; CHECK: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9
; CHECK: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY8]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom TargetCustom8)
; CHECK: [[COPY9:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1)
; CHECK: G_STORE [[INT]](<4 x s32>), [[COPY9]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
; CHECK: S_ENDPGM 0
%v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
store <4 x float> %v, <4 x float> addrspace(1)* undef
ret void
}
; Copy needed for VGPR argument
define amdgpu_ps void @load_1d_sgpr_vaddr__sgpr_srsrc(<8 x i32> inreg %rsrc, i32 inreg %s) {
; CHECK-LABEL: name: load_1d_sgpr_vaddr__sgpr_srsrc
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6
; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7
; CHECK: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8
; CHECK: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9
; CHECK: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr10
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
; CHECK: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[COPY8]](s32)
; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY9]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom TargetCustom8)
; CHECK: [[COPY10:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1)
; CHECK: G_STORE [[INT]](<4 x s32>), [[COPY10]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
; CHECK: S_ENDPGM 0
%v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
store <4 x float> %v, <4 x float> addrspace(1)* undef
ret void
}
; Waterfall loop needed for rsrc
define amdgpu_ps void @load_1d_vgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 %s) {
; CHECK-LABEL: name: load_1d_vgpr_vaddr__vgpr_srsrc
; CHECK: bb.1 (%ir-block.0):
; CHECK: successors: %bb.2(0x80000000)
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3
; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5
; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6
; CHECK: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY $vgpr7
; CHECK: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr8
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
; CHECK: [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
; CHECK: [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64), [[UV2:%[0-9]+]]:vreg_64(s64), [[UV3:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>)
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF2]], %bb.1, %19, %bb.2
; CHECK: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %12(<4 x s32>), %bb.2
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec
; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub1(s64), implicit $exec
; CHECK: [[MV2:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32)
; CHECK: [[V_CMP_EQ_U64_e64_2:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV2]](s64), [[UV2]](s64), implicit $exec
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_2]], [[S_AND_B64_]], implicit-def $scc
; CHECK: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub1(s64), implicit $exec
; CHECK: [[MV3:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32)
; CHECK: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec
; CHECK: [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32)
; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY8]](s32), [[BUILD_VECTOR1]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom TargetCustom8)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_2]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; CHECK: bb.4:
; CHECK: [[COPY9:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1)
; CHECK: G_STORE [[INT]](<4 x s32>), [[COPY9]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
; CHECK: S_ENDPGM 0
%v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
store <4 x float> %v, <4 x float> addrspace(1)* undef
ret void
}
; Waterfall loop needed for rsrc, copy needed for vaddr
define amdgpu_ps void @load_1d_sgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 inreg %s) {
; CHECK-LABEL: name: load_1d_sgpr_vaddr__vgpr_srsrc
; CHECK: bb.1 (%ir-block.0):
; CHECK: successors: %bb.2(0x80000000)
; CHECK: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3
; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5
; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6
; CHECK: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY $vgpr7
; CHECK: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
; CHECK: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[COPY8]](s32)
; CHECK: [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
; CHECK: [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64), [[UV2:%[0-9]+]]:vreg_64(s64), [[UV3:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>)
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF2]], %bb.1, %20, %bb.2
; CHECK: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %12(<4 x s32>), %bb.2
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec
; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub1(s64), implicit $exec
; CHECK: [[MV2:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32)
; CHECK: [[V_CMP_EQ_U64_e64_2:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV2]](s64), [[UV2]](s64), implicit $exec
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_2]], [[S_AND_B64_]], implicit-def $scc
; CHECK: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub1(s64), implicit $exec
; CHECK: [[MV3:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32)
; CHECK: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec
; CHECK: [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32)
; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY9]](s32), [[BUILD_VECTOR1]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom TargetCustom8)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_2]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; CHECK: bb.4:
; CHECK: [[COPY10:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1)
; CHECK: G_STORE [[INT]](<4 x s32>), [[COPY10]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
; CHECK: S_ENDPGM 0
%v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
store <4 x float> %v, <4 x float> addrspace(1)* undef
ret void
}
declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
attributes #0 = { nounwind readonly }

View File

@ -1,268 +0,0 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -stop-after=regbankselect -regbankselect-fast -o - %s | FileCheck %s
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -stop-after=regbankselect -regbankselect-greedy -o - %s | FileCheck %s
; Natural mapping
define amdgpu_ps void @sample_1d_vgpr_vaddr__sgpr_rsrc__sgpr_samp(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
; CHECK-LABEL: name: sample_1d_vgpr_vaddr__sgpr_rsrc__sgpr_samp
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6
; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7
; CHECK: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8
; CHECK: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9
; CHECK: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr10
; CHECK: [[COPY9:%[0-9]+]]:sgpr(s32) = COPY $sgpr11
; CHECK: [[COPY10:%[0-9]+]]:sgpr(s32) = COPY $sgpr12
; CHECK: [[COPY11:%[0-9]+]]:sgpr(s32) = COPY $sgpr13
; CHECK: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom TargetCustom8)
; CHECK: [[COPY13:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1)
; CHECK: G_STORE [[INT]](<4 x s32>), [[COPY13]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
; CHECK: S_ENDPGM 0
%v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
store <4 x float> %v, <4 x float> addrspace(1)* undef
ret void
}
; Copy required for VGPR input
define amdgpu_ps void @sample_1d_sgpr_vaddr__sgpr_rsrc__sgpr_samp(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float inreg %s) {
; CHECK-LABEL: name: sample_1d_sgpr_vaddr__sgpr_rsrc__sgpr_samp
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6
; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7
; CHECK: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8
; CHECK: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9
; CHECK: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr10
; CHECK: [[COPY9:%[0-9]+]]:sgpr(s32) = COPY $sgpr11
; CHECK: [[COPY10:%[0-9]+]]:sgpr(s32) = COPY $sgpr12
; CHECK: [[COPY11:%[0-9]+]]:sgpr(s32) = COPY $sgpr13
; CHECK: [[COPY12:%[0-9]+]]:sgpr(s32) = COPY $sgpr14
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
; CHECK: [[COPY13:%[0-9]+]]:vgpr(s32) = COPY [[COPY12]](s32)
; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY13]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom TargetCustom8)
; CHECK: [[COPY14:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1)
; CHECK: G_STORE [[INT]](<4 x s32>), [[COPY14]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
; CHECK: S_ENDPGM 0
%v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
store <4 x float> %v, <4 x float> addrspace(1)* undef
ret void
}
; Waterfall loop for rsrc
define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__sgpr_samp(<8 x i32> %rsrc, <4 x i32> inreg %samp, float %s) {
; CHECK-LABEL: name: sample_1d_vgpr_vaddr__vgpr_rsrc__sgpr_samp
; CHECK: bb.1 (%ir-block.0):
; CHECK: successors: %bb.2(0x80000000)
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3
; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5
; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6
; CHECK: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY $vgpr7
; CHECK: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
; CHECK: [[COPY9:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
; CHECK: [[COPY10:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
; CHECK: [[COPY11:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
; CHECK: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr8
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
; CHECK: [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
; CHECK: [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64), [[UV2:%[0-9]+]]:vreg_64(s64), [[UV3:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>)
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF2]], %bb.1, %24, %bb.2
; CHECK: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %17(<4 x s32>), %bb.2
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec
; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub1(s64), implicit $exec
; CHECK: [[MV2:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32)
; CHECK: [[V_CMP_EQ_U64_e64_2:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV2]](s64), [[UV2]](s64), implicit $exec
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_2]], [[S_AND_B64_]], implicit-def $scc
; CHECK: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub1(s64), implicit $exec
; CHECK: [[MV3:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32)
; CHECK: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec
; CHECK: [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc
; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32)
; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR2]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom TargetCustom8)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_2]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; CHECK: bb.4:
; CHECK: [[COPY13:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1)
; CHECK: G_STORE [[INT]](<4 x s32>), [[COPY13]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
; CHECK: S_ENDPGM 0
%v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
store <4 x float> %v, <4 x float> addrspace(1)* undef
ret void
}
; Waterfall loop for sampler
define amdgpu_ps void @sample_1d_vgpr_vaddr__sgpr_rsrc__vgpr_samp(<8 x i32> inreg %rsrc, <4 x i32> %samp, float %s) {
; CHECK-LABEL: name: sample_1d_vgpr_vaddr__sgpr_rsrc__vgpr_samp
; CHECK: bb.1 (%ir-block.0):
; CHECK: successors: %bb.2(0x80000000)
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6
; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7
; CHECK: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8
; CHECK: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9
; CHECK: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; CHECK: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
; CHECK: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY $vgpr3
; CHECK: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
; CHECK: [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
; CHECK: [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>)
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF2]], %bb.1, %24, %bb.2
; CHECK: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %17(<4 x s32>), %bb.2
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec
; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR2]](<4 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom TargetCustom8)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; CHECK: bb.4:
; CHECK: [[COPY13:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1)
; CHECK: G_STORE [[INT]](<4 x s32>), [[COPY13]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
; CHECK: S_ENDPGM 0
%v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
store <4 x float> %v, <4 x float> addrspace(1)* undef
ret void
}
; Waterfall loop for rsrc and sampler
define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__vgpr_samp(<8 x i32> %rsrc, <4 x i32> %samp, float %s) {
; CHECK-LABEL: name: sample_1d_vgpr_vaddr__vgpr_rsrc__vgpr_samp
; CHECK: bb.1 (%ir-block.0):
; CHECK: successors: %bb.2(0x80000000)
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3
; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5
; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6
; CHECK: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY $vgpr7
; CHECK: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr8
; CHECK: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY $vgpr9
; CHECK: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY $vgpr10
; CHECK: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY $vgpr11
; CHECK: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr12
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
; CHECK: [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
; CHECK: [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64), [[UV2:%[0-9]+]]:vreg_64(s64), [[UV3:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>)
; CHECK: [[UV4:%[0-9]+]]:vreg_64(s64), [[UV5:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>)
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF2]], %bb.1, %24, %bb.2
; CHECK: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %17(<4 x s32>), %bb.2
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec
; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub1(s64), implicit $exec
; CHECK: [[MV2:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32)
; CHECK: [[V_CMP_EQ_U64_e64_2:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV2]](s64), [[UV2]](s64), implicit $exec
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_2]], [[S_AND_B64_]], implicit-def $scc
; CHECK: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub1(s64), implicit $exec
; CHECK: [[MV3:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32)
; CHECK: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec
; CHECK: [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc
; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32)
; CHECK: [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV4]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV4]].sub1(s64), implicit $exec
; CHECK: [[MV4:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_8]](s32), [[V_READFIRSTLANE_B32_9]](s32)
; CHECK: [[V_CMP_EQ_U64_e64_4:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV4]](s64), [[UV4]](s64), implicit $exec
; CHECK: [[S_AND_B64_3:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_4]], [[S_AND_B64_2]], implicit-def $scc
; CHECK: [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV5]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV5]].sub1(s64), implicit $exec
; CHECK: [[MV5:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_10]](s32), [[V_READFIRSTLANE_B32_11]](s32)
; CHECK: [[V_CMP_EQ_U64_e64_5:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV5]](s64), [[UV5]](s64), implicit $exec
; CHECK: [[S_AND_B64_4:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_5]], [[S_AND_B64_3]], implicit-def $scc
; CHECK: [[BUILD_VECTOR3:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_8]](s32), [[V_READFIRSTLANE_B32_9]](s32), [[V_READFIRSTLANE_B32_10]](s32), [[V_READFIRSTLANE_B32_11]](s32)
; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR2]](<8 x s32>), [[BUILD_VECTOR3]](<4 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom TargetCustom8)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_4]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; CHECK: bb.4:
; CHECK: [[COPY13:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1)
; CHECK: G_STORE [[INT]](<4 x s32>), [[COPY13]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
; CHECK: S_ENDPGM 0
%v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
store <4 x float> %v, <4 x float> addrspace(1)* undef
ret void
}
declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 immarg, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
attributes #0 = { nounwind readonly }

View File

@ -1,173 +0,0 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -stop-after=regbankselect -regbankselect-fast -o - %s | FileCheck %s
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -stop-after=regbankselect -regbankselect-greedy -o - %s | FileCheck %s
; Natural mapping
define amdgpu_ps float @raw_buffer_load__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_load__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.raw.buffer.load), [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), [[COPY5]](s32), 0 :: (dereferenceable load 4 from custom TargetCustom7, align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[INT]](s32)
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret float %val
}
; Copies for VGPR arguments
define amdgpu_ps float @raw_buffer_load__sgpr_rsrc__sgpr_val__sgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 inreg %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_load__sgpr_rsrc__sgpr_val__sgpr_voffset__sgpr_soffset
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6
; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY4]](s32)
; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.raw.buffer.load), [[BUILD_VECTOR]](<4 x s32>), [[COPY6]](s32), [[COPY5]](s32), 0 :: (dereferenceable load 4 from custom TargetCustom7, align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[INT]](s32)
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret float %val
}
; Waterfall for rsrc
define amdgpu_ps float @raw_buffer_load__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset(<4 x i32> %rsrc, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_load__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset
; CHECK: bb.1 (%ir-block.0):
; CHECK: successors: %bb.2(0x80000000)
; CHECK: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3
; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
; CHECK: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
; CHECK: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>)
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF1]], %bb.1, %15, %bb.2
; CHECK: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.1, %9(s32), %bb.2
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec
; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.raw.buffer.load), [[BUILD_VECTOR1]](<4 x s32>), [[COPY4]](s32), [[COPY5]](s32), 0 :: (dereferenceable load 4 from custom TargetCustom7, align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; CHECK: bb.4:
; CHECK: $vgpr0 = COPY [[INT]](s32)
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret float %val
}
; Waterfall for soffset
define amdgpu_ps float @raw_buffer_load__sgpr_rsrc__vgpr_val__vgpr_voffset__vgpr_soffset(<4 x i32> inreg %rsrc, i32 %voffset, i32 %soffset) {
; CHECK-LABEL: name: raw_buffer_load__sgpr_rsrc__vgpr_val__vgpr_voffset__vgpr_soffset
; CHECK: bb.1 (%ir-block.0):
; CHECK: successors: %bb.2(0x80000000)
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0, $vgpr1
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
; CHECK: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
; CHECK: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF1]], %bb.1, %15, %bb.2
; CHECK: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.1, %9(s32), %bb.2
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY5]](s32), implicit $exec
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY5]](s32), implicit $exec
; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.raw.buffer.load), [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), [[V_READFIRSTLANE_B32_]](s32), 0 :: (dereferenceable load 4 from custom TargetCustom7, align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; CHECK: bb.4:
; CHECK: $vgpr0 = COPY [[INT]](s32)
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret float %val
}
; Waterfall for rsrc and soffset
define amdgpu_ps float @raw_buffer_load__vgpr_rsrc__vgpr_val__vgpr_voffset__vgpr_soffset(<4 x i32> %rsrc, i32 %voffset, i32 %soffset) {
; CHECK-LABEL: name: raw_buffer_load__vgpr_rsrc__vgpr_val__vgpr_voffset__vgpr_soffset
; CHECK: bb.1 (%ir-block.0):
; CHECK: successors: %bb.2(0x80000000)
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3
; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr5
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
; CHECK: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
; CHECK: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>)
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF1]], %bb.1, %15, %bb.2
; CHECK: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.1, %9(s32), %bb.2
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec
; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY5]](s32), implicit $exec
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]](s32), [[COPY5]](s32), implicit $exec
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.raw.buffer.load), [[BUILD_VECTOR1]](<4 x s32>), [[COPY4]](s32), [[V_READFIRSTLANE_B32_4]](s32), 0 :: (dereferenceable load 4 from custom TargetCustom7, align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; CHECK: bb.4:
; CHECK: $vgpr0 = COPY [[INT]](s32)
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret float %val
}
declare float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32>, i32, i32, i32 immarg)

View File

@ -1,179 +0,0 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -stop-after=regbankselect -regbankselect-fast -o - %s | FileCheck %s
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -stop-after=regbankselect -regbankselect-greedy -o - %s | FileCheck %s
; Natural mapping
define amdgpu_ps float @struct_buffer_load__sgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: struct_buffer_load__sgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__sgpr_soffset
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr6
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.load), [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), 0 :: (dereferenceable load 4 from custom TargetCustom7, align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[INT]](s32)
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
ret float %val
}
; Copies for VGPR arguments
define amdgpu_ps float @struct_buffer_load__sgpr_rsrc__sgpr_val__sgpr_vindex__sgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 inreg %vindex, i32 inreg %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: struct_buffer_load__sgpr_rsrc__sgpr_val__sgpr_vindex__sgpr_voffset__sgpr_soffset
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6
; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7
; CHECK: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
; CHECK: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[COPY4]](s32)
; CHECK: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32)
; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.load), [[BUILD_VECTOR]](<4 x s32>), [[COPY7]](s32), [[COPY8]](s32), [[COPY6]](s32), 0 :: (dereferenceable load 4 from custom TargetCustom7, align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[INT]](s32)
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
ret float %val
}
; Waterfall for rsrc
define amdgpu_ps float @struct_buffer_load__vgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__sgpr_soffset(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: struct_buffer_load__vgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__sgpr_soffset
; CHECK: bb.1 (%ir-block.0):
; CHECK: successors: %bb.2(0x80000000)
; CHECK: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3
; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5
; CHECK: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
; CHECK: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
; CHECK: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>)
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF1]], %bb.1, %16, %bb.2
; CHECK: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.1, %10(s32), %bb.2
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec
; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.load), [[BUILD_VECTOR1]](<4 x s32>), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), 0 :: (dereferenceable load 4 from custom TargetCustom7, align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; CHECK: bb.4:
; CHECK: $vgpr0 = COPY [[INT]](s32)
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
ret float %val
}
; Waterfall for soffset
define amdgpu_ps float @struct_buffer_load__sgpr_rsrc__vgpr_val__vgpr_vindex_vgpr_voffset__vgpr_soffset(<4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 %soffset) {
; CHECK-LABEL: name: struct_buffer_load__sgpr_rsrc__vgpr_val__vgpr_vindex_vgpr_voffset__vgpr_soffset
; CHECK: bb.1 (%ir-block.0):
; CHECK: successors: %bb.2(0x80000000)
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0, $vgpr1, $vgpr2
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
; CHECK: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
; CHECK: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF1]], %bb.1, %16, %bb.2
; CHECK: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.1, %10(s32), %bb.2
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY6]](s32), implicit $exec
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY6]](s32), implicit $exec
; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.load), [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), [[COPY5]](s32), [[V_READFIRSTLANE_B32_]](s32), 0 :: (dereferenceable load 4 from custom TargetCustom7, align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; CHECK: bb.4:
; CHECK: $vgpr0 = COPY [[INT]](s32)
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
ret float %val
}
; Waterfall for rsrc and soffset
define amdgpu_ps float @struct_buffer_load__vgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__vgpr_soffset(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset) {
; CHECK-LABEL: name: struct_buffer_load__vgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__vgpr_soffset
; CHECK: bb.1 (%ir-block.0):
; CHECK: successors: %bb.2(0x80000000)
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3
; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr6
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
; CHECK: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
; CHECK: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>)
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF1]], %bb.1, %16, %bb.2
; CHECK: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.1, %10(s32), %bb.2
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec
; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY6]](s32), implicit $exec
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]](s32), [[COPY6]](s32), implicit $exec
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.load), [[BUILD_VECTOR1]](<4 x s32>), [[COPY4]](s32), [[COPY5]](s32), [[V_READFIRSTLANE_B32_4]](s32), 0 :: (dereferenceable load 4 from custom TargetCustom7, align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; CHECK: bb.4:
; CHECK: $vgpr0 = COPY [[INT]](s32)
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
ret float %val
}
declare float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32>, i32, i32, i32, i32 immarg)

View File

@ -1,174 +0,0 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -stop-after=regbankselect -regbankselect-fast -o - %s | FileCheck %s
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -stop-after=regbankselect -regbankselect-greedy -o - %s | FileCheck %s
; Natural mapping
define amdgpu_ps void @struct_buffer_store__sgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, float %val, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: struct_buffer_store__sgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__sgpr_soffset
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
; CHECK: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr6
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.store), [[COPY4]](s32), [[BUILD_VECTOR]](<4 x s32>), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), 0 :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.struct.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
ret void
}
; Copies for VGPR arguments
define amdgpu_ps void @struct_buffer_store__sgpr_rsrc__sgpr_val__sgpr_vindex__sgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, float inreg %val, i32 inreg %vindex, i32 inreg %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: struct_buffer_store__sgpr_rsrc__sgpr_val__sgpr_vindex__sgpr_voffset__sgpr_soffset
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6
; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7
; CHECK: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8
; CHECK: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
; CHECK: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[COPY4]](s32)
; CHECK: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32)
; CHECK: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY [[COPY6]](s32)
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.store), [[COPY8]](s32), [[BUILD_VECTOR]](<4 x s32>), [[COPY9]](s32), [[COPY10]](s32), [[COPY7]](s32), 0 :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.struct.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
ret void
}
; Waterfall for rsrc
define amdgpu_ps void @struct_buffer_store__vgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__sgpr_soffset(<4 x i32> %rsrc, float %val, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: struct_buffer_store__vgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__sgpr_soffset
; CHECK: bb.1 (%ir-block.0):
; CHECK: successors: %bb.2(0x80000000)
; CHECK: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3
; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5
; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6
; CHECK: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>)
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF]], %bb.1, %14, %bb.2
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec
; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.store), [[COPY4]](s32), [[BUILD_VECTOR1]](<4 x s32>), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), 0 :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; CHECK: bb.4:
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.struct.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
ret void
}
; Waterfall for soffset
define amdgpu_ps void @struct_buffer_store__sgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__vgpr_soffset(<4 x i32> inreg %rsrc, float %val, i32 %vindex, i32 %voffset, i32 %soffset) {
; CHECK-LABEL: name: struct_buffer_store__sgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__vgpr_soffset
; CHECK: bb.1 (%ir-block.0):
; CHECK: successors: %bb.2(0x80000000)
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0, $vgpr1, $vgpr2, $vgpr3
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr3
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF]], %bb.1, %14, %bb.2
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY7]](s32), implicit $exec
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY7]](s32), implicit $exec
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.store), [[COPY4]](s32), [[BUILD_VECTOR]](<4 x s32>), [[COPY5]](s32), [[COPY6]](s32), [[V_READFIRSTLANE_B32_]](s32), 0 :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; CHECK: bb.4:
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.struct.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
ret void
}
; Waterfall for rsrc and soffset
define amdgpu_ps void @struct_buffer_store__vgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__vgpr_soffset(<4 x i32> %rsrc, float %val, i32 %vindex, i32 %voffset, i32 %soffset) {
; CHECK-LABEL: name: struct_buffer_store__vgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__vgpr_soffset
; CHECK: bb.1 (%ir-block.0):
; CHECK: successors: %bb.2(0x80000000)
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3
; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5
; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr7
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>)
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF]], %bb.1, %14, %bb.2
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec
; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY7]](s32), implicit $exec
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]](s32), [[COPY7]](s32), implicit $exec
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.store), [[COPY4]](s32), [[BUILD_VECTOR1]](<4 x s32>), [[COPY5]](s32), [[COPY6]](s32), [[V_READFIRSTLANE_B32_4]](s32), 0 :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; CHECK: bb.4:
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.struct.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
ret void
}
declare void @llvm.amdgcn.struct.buffer.store.f32(float, <4 x i32>, i32, i32, i32, i32 immarg)

View File

@ -1,9 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX6 %s # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX6 %s
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
--- ---
name: smulh_s32_ss name: smulh_s32_ss
@ -12,16 +8,10 @@ legalized: true
body: | body: |
bb.0: bb.0:
liveins: $sgpr0, $sgpr1 liveins: $sgpr0, $sgpr1
; CHECK-LABEL: name: smulh_s32_ss
; GFX6-LABEL: name: smulh_s32_ss ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; GFX6: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
; GFX6: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK: [[SMULH:%[0-9]+]]:sgpr(s32) = G_SMULH [[COPY]], [[COPY1]]
; GFX6: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; GFX6: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[COPY]], [[COPY2]]
; GFX9-LABEL: name: smulh_s32_ss
; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
; GFX9: [[SMULH:%[0-9]+]]:sgpr(s32) = G_SMULH [[COPY]], [[COPY1]]
%0:_(s32) = COPY $sgpr0 %0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1 %1:_(s32) = COPY $sgpr1
%2:_(s32) = G_SMULH %0, %1 %2:_(s32) = G_SMULH %0, %1
@ -34,15 +24,10 @@ legalized: true
body: | body: |
bb.0: bb.0:
liveins: $sgpr0, $vgpr0 liveins: $sgpr0, $vgpr0
; CHECK-LABEL: name: smulh_s32_sv
; GFX6-LABEL: name: smulh_s32_sv ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; GFX6: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[COPY]], [[COPY1]]
; GFX6: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[COPY]], [[COPY1]]
; GFX9-LABEL: name: smulh_s32_sv
; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; GFX9: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[COPY]], [[COPY1]]
%0:_(s32) = COPY $sgpr0 %0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr0
%2:_(s32) = G_SMULH %0, %1 %2:_(s32) = G_SMULH %0, %1
@ -55,17 +40,11 @@ legalized: true
body: | body: |
bb.0: bb.0:
liveins: $sgpr0, $vgpr0 liveins: $sgpr0, $vgpr0
; CHECK-LABEL: name: smulh_s32_vs
; GFX6-LABEL: name: smulh_s32_vs ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; GFX6: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; GFX6: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; GFX6: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) ; CHECK: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[COPY]], [[COPY2]]
; GFX6: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[COPY]], [[COPY2]]
; GFX9-LABEL: name: smulh_s32_vs
; GFX9: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; GFX9: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; GFX9: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[COPY]], [[COPY2]]
%0:_(s32) = COPY $vgpr0 %0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr0
%2:_(s32) = G_SMULH %0, %1 %2:_(s32) = G_SMULH %0, %1
@ -78,15 +57,10 @@ legalized: true
body: | body: |
bb.0: bb.0:
liveins: $vgpr0, $vgpr1 liveins: $vgpr0, $vgpr1
; CHECK-LABEL: name: smulh_s32_vv
; GFX6-LABEL: name: smulh_s32_vv ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; GFX6: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; CHECK: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[COPY]], [[COPY1]]
; GFX6: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[COPY]], [[COPY1]]
; GFX9-LABEL: name: smulh_s32_vv
; GFX9: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; GFX9: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[COPY]], [[COPY1]]
%0:_(s32) = COPY $vgpr0 %0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1 %1:_(s32) = COPY $vgpr1
%2:_(s32) = G_SMULH %0, %1 %2:_(s32) = G_SMULH %0, %1

View File

@ -1,9 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX6 %s # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
# XUN: llc -march=amdgcn -mcpu=tahiti -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX6 %s
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
# XUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
--- ---
name: umulh_s32_ss name: umulh_s32_ss
@ -12,16 +8,10 @@ legalized: true
body: | body: |
bb.0: bb.0:
liveins: $sgpr0, $sgpr1 liveins: $sgpr0, $sgpr1
; CHECK-LABEL: name: umulh_s32_ss
; GFX6-LABEL: name: umulh_s32_ss ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; GFX6: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
; GFX6: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK: [[UMULH:%[0-9]+]]:sgpr(s32) = G_UMULH [[COPY]], [[COPY1]]
; GFX6: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; GFX6: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[COPY]], [[COPY2]]
; GFX9-LABEL: name: umulh_s32_ss
; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
; GFX9: [[UMULH:%[0-9]+]]:sgpr(s32) = G_UMULH [[COPY]], [[COPY1]]
%0:_(s32) = COPY $sgpr0 %0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1 %1:_(s32) = COPY $sgpr1
%2:_(s32) = G_UMULH %0, %1 %2:_(s32) = G_UMULH %0, %1
@ -34,15 +24,10 @@ legalized: true
body: | body: |
bb.0: bb.0:
liveins: $sgpr0, $vgpr0 liveins: $sgpr0, $vgpr0
; CHECK-LABEL: name: umulh_s32_sv
; GFX6-LABEL: name: umulh_s32_sv ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; GFX6: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[COPY]], [[COPY1]]
; GFX6: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[COPY]], [[COPY1]]
; GFX9-LABEL: name: umulh_s32_sv
; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; GFX9: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[COPY]], [[COPY1]]
%0:_(s32) = COPY $sgpr0 %0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr0
%2:_(s32) = G_UMULH %0, %1 %2:_(s32) = G_UMULH %0, %1
@ -55,17 +40,11 @@ legalized: true
body: | body: |
bb.0: bb.0:
liveins: $sgpr0, $vgpr0 liveins: $sgpr0, $vgpr0
; CHECK-LABEL: name: umulh_s32_vs
; GFX6-LABEL: name: umulh_s32_vs ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; GFX6: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; GFX6: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; GFX6: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) ; CHECK: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[COPY]], [[COPY2]]
; GFX6: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[COPY]], [[COPY2]]
; GFX9-LABEL: name: umulh_s32_vs
; GFX9: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; GFX9: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; GFX9: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[COPY]], [[COPY2]]
%0:_(s32) = COPY $vgpr0 %0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr0
%2:_(s32) = G_UMULH %0, %1 %2:_(s32) = G_UMULH %0, %1
@ -78,15 +57,10 @@ legalized: true
body: | body: |
bb.0: bb.0:
liveins: $vgpr0, $vgpr1 liveins: $vgpr0, $vgpr1
; CHECK-LABEL: name: umulh_s32_vv
; GFX6-LABEL: name: umulh_s32_vv ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; GFX6: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; CHECK: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[COPY]], [[COPY1]]
; GFX6: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[COPY]], [[COPY1]]
; GFX9-LABEL: name: umulh_s32_vv
; GFX9: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; GFX9: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[COPY]], [[COPY1]]
%0:_(s32) = COPY $vgpr0 %0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1 %1:_(s32) = COPY $vgpr1
%2:_(s32) = G_UMULH %0, %1 %2:_(s32) = G_UMULH %0, %1

View File

@ -1,70 +0,0 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -verify-machineinstrs -run-pass=machine-scheduler -verify-misched -o - %s | FileCheck %s
# This would assert that a dead def should have no uses, but the dead
# def and use have different subreg indices.
---
name: multi_def_dead_reg_subreg_check
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
scratchRSrcReg: '$sgpr24_sgpr25_sgpr26_sgpr27'
scratchWaveOffsetReg: '$sgpr32'
frameOffsetReg: '$sgpr32'
stackPtrOffsetReg: '$sgpr32'
argumentInfo:
privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
privateSegmentWaveByteOffset: { reg: '$sgpr33' }
body: |
; CHECK-LABEL: name: multi_def_dead_reg_subreg_check
; CHECK: bb.0:
; CHECK: successors: %bb.1(0x80000000)
; CHECK: liveins: $sgpr6_sgpr7
; CHECK: undef %0.sub3:vreg_512 = V_MOV_B32_e32 0, implicit $exec
; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; CHECK: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 0, [[V_MOV_B32_e32_]], implicit $exec
; CHECK: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; CHECK: [[COPY:%[0-9]+]]:vreg_512 = COPY %0
; CHECK: bb.1:
; CHECK: successors: %bb.1(0x80000000)
; CHECK: BUFFER_STORE_DWORD_OFFEN %0.sub3, undef %5:vgpr_32, $sgpr24_sgpr25_sgpr26_sgpr27, $sgpr32, 0, 0, 0, 0, 0, implicit $exec :: (store 4, align 8, addrspace 5)
; CHECK: dead %6:vgpr_32 = DS_READ_B32_gfx9 undef %7:vgpr_32, 0, 0, implicit $exec
; CHECK: dead %8:vreg_64 = DS_READ_B64_gfx9 [[V_MOV_B32_e32_]], 0, 0, implicit $exec
; CHECK: dead %9:vreg_128 = DS_READ_B128_gfx9 [[V_ADD_U32_e32_]], 0, 0, implicit $exec
; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
; CHECK: undef %11.sub1:vreg_512 = COPY [[COPY]].sub1
; CHECK: INLINEASM &"", 1, 851978, def dead [[COPY1]], 851978, def dead [[COPY]].sub1, 2147483657, [[COPY1]], 2147549193, [[COPY]].sub1
; CHECK: %11.sub0:vreg_512 = COPY [[COPY]].sub0
; CHECK: %11.sub3:vreg_512 = COPY [[COPY]].sub3
; CHECK: dead %10:vgpr_32 = V_ADD_I32_e32 4, [[V_MOV_B32_e32_1]], implicit-def dead $vcc, implicit $exec
; CHECK: %11.sub2:vreg_512 = COPY undef [[V_MOV_B32_e32_]]
; CHECK: %11.sub5:vreg_512 = COPY undef [[V_MOV_B32_e32_]]
; CHECK: [[COPY2:%[0-9]+]]:vreg_512 = COPY %11
; CHECK: S_BRANCH %bb.1
bb.0:
liveins: $sgpr6_sgpr7
undef %0.sub3:vreg_512 = V_MOV_B32_e32 0, implicit $exec
%1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
%2:vgpr_32 = V_ADD_U32_e32 0, %1, implicit $exec
%3:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
%4:vreg_512 = COPY %0
bb.1:
BUFFER_STORE_DWORD_OFFEN %0.sub3, undef %5:vgpr_32, $sgpr24_sgpr25_sgpr26_sgpr27, $sgpr32, 0, 0, 0, 0, 0, implicit $exec :: (store 4, align 8, addrspace 5)
%6:vgpr_32 = DS_READ_B32_gfx9 undef %7:vgpr_32, 0, 0, implicit $exec
%8:vreg_64 = DS_READ_B64_gfx9 %1, 0, 0, implicit $exec
%9:vreg_128 = DS_READ_B128_gfx9 %2, 0, 0, implicit $exec
%10:vgpr_32 = V_ADD_I32_e32 4, %3, implicit-def dead $vcc, implicit $exec
undef %11.sub0:vreg_512 = COPY %4.sub0
%12:vgpr_32 = COPY %4.sub0
%11.sub1:vreg_512 = COPY %4.sub1
INLINEASM &"", 1, 851978, def dead %12, 851978, def dead %4.sub1, 2147483657, %12, 2147549193, %4.sub1
%11.sub2:vreg_512 = COPY undef %1
%11.sub3:vreg_512 = COPY %4.sub3
%11.sub5:vreg_512 = COPY undef %1
%4:vreg_512 = COPY %11
S_BRANCH %bb.1
...

View File

@ -1,31 +0,0 @@
// RUN: llvm-tblgen -gen-global-isel -optimize-match-table=false -I %p/Common -I %p/../../include %s -o - < %s | FileCheck -check-prefix=GISEL %s
include "llvm/Target/Target.td"
include "GlobalISelEmitterCommon.td"
let TargetPrefix = "mytarget" in {
def int_mytarget_sleep0 : Intrinsic<[], [llvm_i32_ty], [ImmArg<0>]>;
def int_mytarget_sleep1 : Intrinsic<[], [llvm_i32_ty], [ImmArg<0>]>;
}
// GISEL: GIM_CheckOpcode, /*MI*/0, TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS,
// GISEL-NEXT: // MIs[0] Operand 0
// GISEL-NEXT: GIM_CheckIntrinsicID, /*MI*/0, /*Op*/0, Intrinsic::mytarget_sleep0,
// GISEL-NEXT: // MIs[0] src
// GISEL-NEXT: GIM_CheckIsImm, /*MI*/0, /*Op*/1,
// GISEL-NEXT: // (intrinsic_void {{[0-9]+}}:{ *:[iPTR] }, (timm:{ *:[i32] }):$src) => (SLEEP0 (timm:{ *:[i32] }):$src)
// GISEL-NEXT: GIR_BuildMI, /*InsnID*/0, /*Opcode*/MyTarget::SLEEP0,
// GISEL-NEXT: GIR_Copy, /*NewInsnID*/0, /*OldInsnID*/0, /*OpIdx*/1, // src
def SLEEP0 : I<(outs), (ins i32imm:$src),
[(int_mytarget_sleep0 timm:$src)]
>;
// Test for situation which was crashing in ARM patterns.
def p_imm : Operand<i32>;
def SLEEP1 : I<(outs), (ins p_imm:$src), []>;
// FIXME: This should not crash, but should it work or be an error?
// def : Pat <
// (int_mytarget_sleep1 timm:$src),
// (SLEEP1 imm:$src)
// >;

View File

@ -1062,7 +1062,6 @@ public:
IPM_Opcode, IPM_Opcode,
IPM_NumOperands, IPM_NumOperands,
IPM_ImmPredicate, IPM_ImmPredicate,
IPM_Imm,
IPM_AtomicOrderingMMO, IPM_AtomicOrderingMMO,
IPM_MemoryLLTSize, IPM_MemoryLLTSize,
IPM_MemoryVsLLTSize, IPM_MemoryVsLLTSize,
@ -1341,23 +1340,6 @@ public:
} }
}; };
class ImmOperandMatcher : public OperandPredicateMatcher {
public:
ImmOperandMatcher(unsigned InsnVarID, unsigned OpIdx)
: OperandPredicateMatcher(IPM_Imm, InsnVarID, OpIdx) {}
static bool classof(const PredicateMatcher *P) {
return P->getKind() == IPM_Imm;
}
void emitPredicateOpcodes(MatchTable &Table,
RuleMatcher &Rule) const override {
Table << MatchTable::Opcode("GIM_CheckIsImm") << MatchTable::Comment("MI")
<< MatchTable::IntValue(InsnVarID) << MatchTable::Comment("Op")
<< MatchTable::IntValue(OpIdx) << MatchTable::LineBreak;
}
};
/// Generates code to check that an operand is a G_CONSTANT with a particular /// Generates code to check that an operand is a G_CONSTANT with a particular
/// int. /// int.
class ConstantIntOperandMatcher : public OperandPredicateMatcher { class ConstantIntOperandMatcher : public OperandPredicateMatcher {
@ -3812,10 +3794,6 @@ Error GlobalISelEmitter::importChildMatcher(RuleMatcher &Rule,
OM.addPredicate<MBBOperandMatcher>(); OM.addPredicate<MBBOperandMatcher>();
return Error::success(); return Error::success();
} }
if (SrcChild->getOperator()->getName() == "timm") {
OM.addPredicate<ImmOperandMatcher>();
return Error::success();
}
} }
} }
@ -3965,10 +3943,7 @@ Expected<action_iterator> GlobalISelEmitter::importExplicitUseRenderer(
// rendered as operands. // rendered as operands.
// FIXME: The target should be able to choose sign-extended when appropriate // FIXME: The target should be able to choose sign-extended when appropriate
// (e.g. on Mips). // (e.g. on Mips).
if (DstChild->getOperator()->getName() == "timm") { if (DstChild->getOperator()->getName() == "imm") {
DstMIBuilder.addRenderer<CopyRenderer>(DstChild->getName());
return InsertPt;
} else if (DstChild->getOperator()->getName() == "imm") {
DstMIBuilder.addRenderer<CopyConstantAsImmRenderer>(DstChild->getName()); DstMIBuilder.addRenderer<CopyConstantAsImmRenderer>(DstChild->getName());
return InsertPt; return InsertPt;
} else if (DstChild->getOperator()->getName() == "fpimm") { } else if (DstChild->getOperator()->getName() == "fpimm") {