forked from OSchip/llvm-project
R600/SI: Allow f64 inline immediates in i64 operands
This requires considering the size of the operand when checking immediate legality. llvm-svn: 229135
This commit is contained in:
parent
8a9e404c0e
commit
11a4d6774b
|
@ -33,8 +33,8 @@ namespace {
|
|||
|
||||
/// \brief Helper type used in encoding
|
||||
typedef union {
|
||||
int32_t I;
|
||||
float F;
|
||||
int64_t I;
|
||||
double F;
|
||||
} IntFloatUnion;
|
||||
|
||||
class SIMCCodeEmitter : public AMDGPUMCCodeEmitter {
|
||||
|
@ -48,7 +48,7 @@ class SIMCCodeEmitter : public AMDGPUMCCodeEmitter {
|
|||
bool isSrcOperand(const MCInstrDesc &Desc, unsigned OpNo) const;
|
||||
|
||||
/// \brief Encode an fp or int literal
|
||||
uint32_t getLitEncoding(const MCOperand &MO) const;
|
||||
uint32_t getLitEncoding(const MCOperand &MO, unsigned OpSize) const;
|
||||
|
||||
public:
|
||||
SIMCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri,
|
||||
|
@ -91,51 +91,101 @@ bool SIMCCodeEmitter::isSrcOperand(const MCInstrDesc &Desc,
|
|||
OpType == AMDGPU::OPERAND_REG_INLINE_C;
|
||||
}
|
||||
|
||||
uint32_t SIMCCodeEmitter::getLitEncoding(const MCOperand &MO) const {
|
||||
// Returns the encoding value to use if the given integer is an integer inline
|
||||
// immediate value, or 0 if it is not.
|
||||
template <typename IntTy>
|
||||
static uint32_t getIntInlineImmEncoding(IntTy Imm) {
|
||||
if (Imm >= 0 && Imm <= 64)
|
||||
return 128 + Imm;
|
||||
|
||||
IntFloatUnion Imm;
|
||||
if (MO.isImm())
|
||||
Imm.I = MO.getImm();
|
||||
else if (MO.isFPImm())
|
||||
Imm.F = MO.getFPImm();
|
||||
else if (MO.isExpr())
|
||||
return 255;
|
||||
else
|
||||
return ~0;
|
||||
if (Imm >= -16 && Imm <= -1)
|
||||
return 192 + std::abs(Imm);
|
||||
|
||||
if (Imm.I >= 0 && Imm.I <= 64)
|
||||
return 128 + Imm.I;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (Imm.I >= -16 && Imm.I <= -1)
|
||||
return 192 + abs(Imm.I);
|
||||
static uint32_t getLit32Encoding(uint32_t Val) {
|
||||
uint32_t IntImm = getIntInlineImmEncoding(static_cast<int32_t>(Val));
|
||||
if (IntImm != 0)
|
||||
return IntImm;
|
||||
|
||||
if (Imm.F == 0.5f)
|
||||
if (Val == FloatToBits(0.5f))
|
||||
return 240;
|
||||
|
||||
if (Imm.F == -0.5f)
|
||||
if (Val == FloatToBits(-0.5f))
|
||||
return 241;
|
||||
|
||||
if (Imm.F == 1.0f)
|
||||
if (Val == FloatToBits(1.0f))
|
||||
return 242;
|
||||
|
||||
if (Imm.F == -1.0f)
|
||||
if (Val == FloatToBits(-1.0f))
|
||||
return 243;
|
||||
|
||||
if (Imm.F == 2.0f)
|
||||
if (Val == FloatToBits(2.0f))
|
||||
return 244;
|
||||
|
||||
if (Imm.F == -2.0f)
|
||||
if (Val == FloatToBits(-2.0f))
|
||||
return 245;
|
||||
|
||||
if (Imm.F == 4.0f)
|
||||
if (Val == FloatToBits(4.0f))
|
||||
return 246;
|
||||
|
||||
if (Imm.F == -4.0f)
|
||||
if (Val == FloatToBits(-4.0f))
|
||||
return 247;
|
||||
|
||||
return 255;
|
||||
}
|
||||
|
||||
static uint32_t getLit64Encoding(uint64_t Val) {
|
||||
uint32_t IntImm = getIntInlineImmEncoding(static_cast<int64_t>(Val));
|
||||
if (IntImm != 0)
|
||||
return IntImm;
|
||||
|
||||
if (Val == DoubleToBits(0.5))
|
||||
return 240;
|
||||
|
||||
if (Val == DoubleToBits(-0.5))
|
||||
return 241;
|
||||
|
||||
if (Val == DoubleToBits(1.0))
|
||||
return 242;
|
||||
|
||||
if (Val == DoubleToBits(-1.0))
|
||||
return 243;
|
||||
|
||||
if (Val == DoubleToBits(2.0))
|
||||
return 244;
|
||||
|
||||
if (Val == DoubleToBits(-2.0))
|
||||
return 245;
|
||||
|
||||
if (Val == DoubleToBits(4.0))
|
||||
return 246;
|
||||
|
||||
if (Val == DoubleToBits(-4.0))
|
||||
return 247;
|
||||
|
||||
return 255;
|
||||
}
|
||||
|
||||
uint32_t SIMCCodeEmitter::getLitEncoding(const MCOperand &MO,
|
||||
unsigned OpSize) const {
|
||||
if (MO.isExpr())
|
||||
return 255;
|
||||
|
||||
assert(!MO.isFPImm());
|
||||
|
||||
if (!MO.isImm())
|
||||
return ~0;
|
||||
|
||||
if (OpSize == 4)
|
||||
return getLit32Encoding(static_cast<uint32_t>(MO.getImm()));
|
||||
|
||||
assert(OpSize == 8);
|
||||
|
||||
return getLit64Encoding(static_cast<uint64_t>(MO.getImm()));
|
||||
}
|
||||
|
||||
void SIMCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
|
||||
SmallVectorImpl<MCFixup> &Fixups,
|
||||
const MCSubtargetInfo &STI) const {
|
||||
|
@ -158,9 +208,12 @@ void SIMCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
|
|||
if (!isSrcOperand(Desc, i))
|
||||
continue;
|
||||
|
||||
int RCID = Desc.OpInfo[i].RegClass;
|
||||
const MCRegisterClass &RC = MRI.getRegClass(RCID);
|
||||
|
||||
// Is this operand a literal immediate?
|
||||
const MCOperand &Op = MI.getOperand(i);
|
||||
if (getLitEncoding(Op) != 255)
|
||||
if (getLitEncoding(Op, RC.getSize()) != 255)
|
||||
continue;
|
||||
|
||||
// Yes! Encode it
|
||||
|
@ -231,7 +284,10 @@ uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI,
|
|||
|
||||
const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
|
||||
if (isSrcOperand(Desc, OpNo)) {
|
||||
uint32_t Enc = getLitEncoding(MO);
|
||||
int RCID = Desc.OpInfo[OpNo].RegClass;
|
||||
const MCRegisterClass &RC = MRI.getRegClass(RCID);
|
||||
|
||||
uint32_t Enc = getLitEncoding(MO, RC.getSize());
|
||||
if (Enc != ~0U && (Enc != 255 || Desc.getSize() == 4))
|
||||
return Enc;
|
||||
|
||||
|
|
|
@ -172,6 +172,7 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
|
|||
if (!isSafeToFold(MI.getOpcode()))
|
||||
continue;
|
||||
|
||||
unsigned OpSize = TII->getOpSize(MI, 1);
|
||||
MachineOperand &OpToFold = MI.getOperand(1);
|
||||
bool FoldingImm = OpToFold.isImm();
|
||||
|
||||
|
@ -183,7 +184,7 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
|
|||
// Folding immediates with more than one use will increase program size.
|
||||
// FIXME: This will also reduce register usage, which may be better
|
||||
// in some cases. A better heuristic is needed.
|
||||
if (FoldingImm && !TII->isInlineConstant(OpToFold) &&
|
||||
if (FoldingImm && !TII->isInlineConstant(OpToFold, OpSize) &&
|
||||
!MRI.hasOneUse(MI.getOperand(0).getReg()))
|
||||
continue;
|
||||
|
||||
|
|
|
@ -1734,13 +1734,11 @@ int32_t SITargetLowering::analyzeImmediate(const SDNode *N) const {
|
|||
static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
|
||||
|
||||
if (const ConstantSDNode *Node = dyn_cast<ConstantSDNode>(N)) {
|
||||
if (Node->getZExtValue() >> 32)
|
||||
return -1;
|
||||
|
||||
if (TII->isInlineConstant(Node->getAPIntValue()))
|
||||
return 0;
|
||||
|
||||
return Node->getZExtValue();
|
||||
uint64_t Val = Node->getZExtValue();
|
||||
return isUInt<32>(Val) ? Val : -1;
|
||||
}
|
||||
|
||||
if (const ConstantFPSDNode *Node = dyn_cast<ConstantFPSDNode>(N)) {
|
||||
|
|
|
@ -977,15 +977,25 @@ bool SIInstrInfo::isInlineConstant(const APInt &Imm) const {
|
|||
(FloatToBits(-4.0f) == Val);
|
||||
}
|
||||
|
||||
bool SIInstrInfo::isInlineConstant(const MachineOperand &MO) const {
|
||||
if (MO.isImm())
|
||||
return isInlineConstant(APInt(32, MO.getImm(), true));
|
||||
bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
|
||||
unsigned OpSize) const {
|
||||
if (MO.isImm()) {
|
||||
// MachineOperand provides no way to tell the true operand size, since it
|
||||
// only records a 64-bit value. We need to know the size to determine if a
|
||||
// 32-bit floating point immediate bit pattern is legal for an integer
|
||||
// immediate. It would be for any 32-bit integer operand, but would not be
|
||||
// for a 64-bit one.
|
||||
|
||||
unsigned BitSize = 8 * OpSize;
|
||||
return isInlineConstant(APInt(BitSize, MO.getImm(), true));
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool SIInstrInfo::isLiteralConstant(const MachineOperand &MO) const {
|
||||
return MO.isImm() && !isInlineConstant(MO);
|
||||
bool SIInstrInfo::isLiteralConstant(const MachineOperand &MO,
|
||||
unsigned OpSize) const {
|
||||
return MO.isImm() && !isInlineConstant(MO, OpSize);
|
||||
}
|
||||
|
||||
static bool compareMachineOp(const MachineOperand &Op0,
|
||||
|
@ -1015,7 +1025,8 @@ bool SIInstrInfo::isImmOperandLegal(const MachineInstr *MI, unsigned OpNo,
|
|||
if (OpInfo.RegClass < 0)
|
||||
return false;
|
||||
|
||||
if (isLiteralConstant(MO))
|
||||
unsigned OpSize = RI.getRegClass(OpInfo.RegClass)->getSize();
|
||||
if (isLiteralConstant(MO, OpSize))
|
||||
return RI.opCanUseLiteralConstant(OpInfo.OperandType);
|
||||
|
||||
return RI.opCanUseInlineConstant(OpInfo.OperandType);
|
||||
|
@ -1070,9 +1081,10 @@ bool SIInstrInfo::hasModifiersSet(const MachineInstr &MI,
|
|||
}
|
||||
|
||||
bool SIInstrInfo::usesConstantBus(const MachineRegisterInfo &MRI,
|
||||
const MachineOperand &MO) const {
|
||||
const MachineOperand &MO,
|
||||
unsigned OpSize) const {
|
||||
// Literal constants use the constant bus.
|
||||
if (isLiteralConstant(MO))
|
||||
if (isLiteralConstant(MO, OpSize))
|
||||
return true;
|
||||
|
||||
if (!MO.isReg() || !MO.isUse())
|
||||
|
@ -1134,9 +1146,13 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
|
|||
case AMDGPU::OPERAND_REG_IMM32:
|
||||
break;
|
||||
case AMDGPU::OPERAND_REG_INLINE_C:
|
||||
if (MI->getOperand(i).isImm() && !isInlineConstant(MI->getOperand(i))) {
|
||||
ErrInfo = "Illegal immediate value for operand.";
|
||||
return false;
|
||||
if (MI->getOperand(i).isImm()) {
|
||||
int RegClass = Desc.OpInfo[i].RegClass;
|
||||
const TargetRegisterClass *RC = RI.getRegClass(RegClass);
|
||||
if (!isInlineConstant(MI->getOperand(i), RC->getSize())) {
|
||||
ErrInfo = "Illegal immediate value for operand.";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case MCOI::OPERAND_IMMEDIATE:
|
||||
|
@ -1182,9 +1198,8 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
|
|||
for (int OpIdx : OpIndices) {
|
||||
if (OpIdx == -1)
|
||||
break;
|
||||
|
||||
const MachineOperand &MO = MI->getOperand(OpIdx);
|
||||
if (usesConstantBus(MRI, MO)) {
|
||||
if (usesConstantBus(MRI, MO, getOpSize(Opcode, OpIdx))) {
|
||||
if (MO.isReg()) {
|
||||
if (MO.getReg() != SGPRUsed)
|
||||
++ConstantBusCount;
|
||||
|
@ -1211,15 +1226,18 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
|
|||
|
||||
// Verify VOP3
|
||||
if (isVOP3(Opcode)) {
|
||||
if (Src0Idx != -1 && isLiteralConstant(MI->getOperand(Src0Idx))) {
|
||||
if (Src0Idx != -1 &&
|
||||
isLiteralConstant(MI->getOperand(Src0Idx), getOpSize(Opcode, Src0Idx))) {
|
||||
ErrInfo = "VOP3 src0 cannot be a literal constant.";
|
||||
return false;
|
||||
}
|
||||
if (Src1Idx != -1 && isLiteralConstant(MI->getOperand(Src1Idx))) {
|
||||
if (Src1Idx != -1 &&
|
||||
isLiteralConstant(MI->getOperand(Src1Idx), getOpSize(Opcode, Src1Idx))) {
|
||||
ErrInfo = "VOP3 src1 cannot be a literal constant.";
|
||||
return false;
|
||||
}
|
||||
if (Src2Idx != -1 && isLiteralConstant(MI->getOperand(Src2Idx))) {
|
||||
if (Src2Idx != -1 &&
|
||||
isLiteralConstant(MI->getOperand(Src2Idx), getOpSize(Opcode, Src2Idx))) {
|
||||
ErrInfo = "VOP3 src2 cannot be a literal constant.";
|
||||
return false;
|
||||
}
|
||||
|
@ -1312,7 +1330,7 @@ const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI,
|
|||
|
||||
if (TargetRegisterInfo::isVirtualRegister(Reg))
|
||||
return MRI.getRegClass(Reg);
|
||||
return RI.getRegClass(Reg);
|
||||
return RI.getPhysRegClass(Reg);
|
||||
}
|
||||
|
||||
unsigned RCID = Desc.OpInfo[OpNo].RegClass;
|
||||
|
@ -1456,14 +1474,16 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr *MI, unsigned OpIdx,
|
|||
if (!MO)
|
||||
MO = &MI->getOperand(OpIdx);
|
||||
|
||||
if (isVALU(InstDesc.Opcode) && usesConstantBus(MRI, *MO)) {
|
||||
if (isVALU(InstDesc.Opcode) &&
|
||||
usesConstantBus(MRI, *MO, DefinedRC->getSize())) {
|
||||
unsigned SGPRUsed =
|
||||
MO->isReg() ? MO->getReg() : (unsigned)AMDGPU::NoRegister;
|
||||
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
|
||||
if (i == OpIdx)
|
||||
continue;
|
||||
if (usesConstantBus(MRI, MI->getOperand(i)) &&
|
||||
MI->getOperand(i).isReg() && MI->getOperand(i).getReg() != SGPRUsed) {
|
||||
const MachineOperand &Op = MI->getOperand(i);
|
||||
if (Op.isReg() && Op.getReg() != SGPRUsed &&
|
||||
usesConstantBus(MRI, Op, getOpSize(*MI, i))) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
@ -1556,7 +1576,7 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
|
|||
// We can use one SGPR in each VOP3 instruction.
|
||||
continue;
|
||||
}
|
||||
} else if (!isLiteralConstant(MO)) {
|
||||
} else if (!isLiteralConstant(MO, getOpSize(MI->getOpcode(), Idx))) {
|
||||
// If it is not a register and not a literal constant, then it must be
|
||||
// an inline constant which is always legal.
|
||||
continue;
|
||||
|
|
|
@ -209,8 +209,8 @@ public:
|
|||
}
|
||||
|
||||
bool isInlineConstant(const APInt &Imm) const;
|
||||
bool isInlineConstant(const MachineOperand &MO) const;
|
||||
bool isLiteralConstant(const MachineOperand &MO) const;
|
||||
bool isInlineConstant(const MachineOperand &MO, unsigned OpSize) const;
|
||||
bool isLiteralConstant(const MachineOperand &MO, unsigned OpSize) const;
|
||||
|
||||
bool isImmOperandLegal(const MachineInstr *MI, unsigned OpNo,
|
||||
const MachineOperand &MO) const;
|
||||
|
@ -225,7 +225,8 @@ public:
|
|||
|
||||
/// \brief Returns true if this operand uses the constant bus.
|
||||
bool usesConstantBus(const MachineRegisterInfo &MRI,
|
||||
const MachineOperand &MO) const;
|
||||
const MachineOperand &MO,
|
||||
unsigned OpSize) const;
|
||||
|
||||
/// \brief Return true if this instruction has any modifiers.
|
||||
/// e.g. src[012]_mod, omod, clamp.
|
||||
|
@ -247,7 +248,20 @@ public:
|
|||
/// the register class of its machine operand.
|
||||
/// to infer the correct register class base on the other operands.
|
||||
const TargetRegisterClass *getOpRegClass(const MachineInstr &MI,
|
||||
unsigned OpNo) const;\
|
||||
unsigned OpNo) const;
|
||||
|
||||
/// \brief Return the size in bytes of the operand OpNo on the given
|
||||
// instruction opcode.
|
||||
unsigned getOpSize(uint16_t Opcode, unsigned OpNo) const {
|
||||
const MCOperandInfo &OpInfo = get(Opcode).OpInfo[OpNo];
|
||||
return RI.getRegClass(OpInfo.RegClass)->getSize();
|
||||
}
|
||||
|
||||
/// \brief This form should usually be preferred since it handles operands
|
||||
/// with unknown register classes.
|
||||
unsigned getOpSize(const MachineInstr &MI, unsigned OpNo) const {
|
||||
return getOpRegClass(MI, OpNo)->getSize();
|
||||
}
|
||||
|
||||
/// \returns true if it is legal for the operand at index \p OpNo
|
||||
/// to read a VGPR.
|
||||
|
|
|
@ -127,30 +127,31 @@ static void foldImmediates(MachineInstr &MI, const SIInstrInfo *TII,
|
|||
TII->isVOPC(MI.getOpcode()));
|
||||
|
||||
const SIRegisterInfo &TRI = TII->getRegisterInfo();
|
||||
MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
|
||||
int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
|
||||
MachineOperand &Src0 = MI.getOperand(Src0Idx);
|
||||
|
||||
// Only one literal constant is allowed per instruction, so if src0 is a
|
||||
// literal constant then we can't do any folding.
|
||||
if (Src0->isImm() && TII->isLiteralConstant(*Src0))
|
||||
if (Src0.isImm() &&
|
||||
TII->isLiteralConstant(Src0, TII->getOpSize(MI, Src0Idx)))
|
||||
return;
|
||||
|
||||
|
||||
// Literal constants and SGPRs can only be used in Src0, so if Src0 is an
|
||||
// SGPR, we cannot commute the instruction, so we can't fold any literal
|
||||
// constants.
|
||||
if (Src0->isReg() && !isVGPR(Src0, TRI, MRI))
|
||||
if (Src0.isReg() && !isVGPR(&Src0, TRI, MRI))
|
||||
return;
|
||||
|
||||
// Try to fold Src0
|
||||
if (Src0->isReg()) {
|
||||
unsigned Reg = Src0->getReg();
|
||||
if (Src0.isReg()) {
|
||||
unsigned Reg = Src0.getReg();
|
||||
MachineInstr *Def = MRI.getUniqueVRegDef(Reg);
|
||||
if (Def && Def->isMoveImmediate()) {
|
||||
MachineOperand &MovSrc = Def->getOperand(1);
|
||||
bool ConstantFolded = false;
|
||||
|
||||
if (MovSrc.isImm() && isUInt<32>(MovSrc.getImm())) {
|
||||
Src0->ChangeToImmediate(MovSrc.getImm());
|
||||
Src0.ChangeToImmediate(MovSrc.getImm());
|
||||
ConstantFolded = true;
|
||||
}
|
||||
if (ConstantFolded) {
|
||||
|
@ -189,7 +190,7 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
|
|||
const MachineOperand &Src = MI.getOperand(1);
|
||||
|
||||
if (Src.isImm()) {
|
||||
if (isInt<16>(Src.getImm()) && !TII->isInlineConstant(Src))
|
||||
if (isInt<16>(Src.getImm()) && !TII->isInlineConstant(Src, 4))
|
||||
MI.setDesc(TII->get(AMDGPU::S_MOVK_I32));
|
||||
}
|
||||
|
||||
|
|
|
@ -64,8 +64,8 @@ define void @v_and_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addr
|
|||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_and_constant_i32:
|
||||
; SI: v_and_b32
|
||||
; FUNC-LABEL: {{^}}v_and_constant_i32
|
||||
; SI: v_and_b32_e32 v{{[0-9]+}}, 0x12d687, v{{[0-9]+}}
|
||||
define void @v_and_constant_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) {
|
||||
%a = load i32 addrspace(1)* %aptr, align 4
|
||||
%and = and i32 %a, 1234567
|
||||
|
@ -73,7 +73,25 @@ define void @v_and_constant_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr)
|
|||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}s_and_i64:
|
||||
; FUNC-LABEL: {{^}}v_and_inline_imm_64_i32
|
||||
; SI: v_and_b32_e32 v{{[0-9]+}}, 64, v{{[0-9]+}}
|
||||
define void @v_and_inline_imm_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) {
|
||||
%a = load i32 addrspace(1)* %aptr, align 4
|
||||
%and = and i32 %a, 64
|
||||
store i32 %and, i32 addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_and_inline_imm_neg_16_i32
|
||||
; SI: v_and_b32_e32 v{{[0-9]+}}, -16, v{{[0-9]+}}
|
||||
define void @v_and_inline_imm_neg_16_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) {
|
||||
%a = load i32 addrspace(1)* %aptr, align 4
|
||||
%and = and i32 %a, -16
|
||||
store i32 %and, i32 addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}s_and_i64
|
||||
; SI: s_and_b64
|
||||
define void @s_and_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
|
||||
%and = and i64 %a, %b
|
||||
|
@ -90,8 +108,8 @@ define void @s_and_i1(i1 addrspace(1)* %out, i1 %a, i1 %b) {
|
|||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}s_and_constant_i64:
|
||||
; SI: s_and_b64
|
||||
; FUNC-LABEL: {{^}}s_and_constant_i64
|
||||
; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}
|
||||
define void @s_and_constant_i64(i64 addrspace(1)* %out, i64 %a) {
|
||||
%and = and i64 %a, 281474976710655
|
||||
store i64 %and, i64 addrspace(1)* %out, align 8
|
||||
|
@ -150,10 +168,129 @@ define void @v_and_inline_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %apt
|
|||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}s_and_inline_imm_i64:
|
||||
; FUNC-LABEL: {{^}}s_and_inline_imm_64_i64
|
||||
; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 64
|
||||
define void @s_and_inline_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
|
||||
define void @s_and_inline_imm_64_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
|
||||
%and = and i64 %a, 64
|
||||
store i64 %and, i64 addrspace(1)* %out, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}s_and_inline_imm_1_i64
|
||||
; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 1
|
||||
define void @s_and_inline_imm_1_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
|
||||
%and = and i64 %a, 1
|
||||
store i64 %and, i64 addrspace(1)* %out, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}s_and_inline_imm_1.0_i64
|
||||
; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 1.0
|
||||
define void @s_and_inline_imm_1.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
|
||||
%and = and i64 %a, 4607182418800017408
|
||||
store i64 %and, i64 addrspace(1)* %out, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}s_and_inline_imm_neg_1.0_i64
|
||||
; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, -1.0
|
||||
define void @s_and_inline_imm_neg_1.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
|
||||
%and = and i64 %a, 13830554455654793216
|
||||
store i64 %and, i64 addrspace(1)* %out, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}s_and_inline_imm_0.5_i64
|
||||
; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0.5
|
||||
define void @s_and_inline_imm_0.5_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
|
||||
%and = and i64 %a, 4602678819172646912
|
||||
store i64 %and, i64 addrspace(1)* %out, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}s_and_inline_imm_neg_0.5_i64
|
||||
; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, -0.5
|
||||
define void @s_and_inline_imm_neg_0.5_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
|
||||
%and = and i64 %a, 13826050856027422720
|
||||
store i64 %and, i64 addrspace(1)* %out, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}s_and_inline_imm_2.0_i64
|
||||
; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 2.0
|
||||
define void @s_and_inline_imm_2.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
|
||||
%and = and i64 %a, 4611686018427387904
|
||||
store i64 %and, i64 addrspace(1)* %out, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}s_and_inline_imm_neg_2.0_i64
|
||||
; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, -2.0
|
||||
define void @s_and_inline_imm_neg_2.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
|
||||
%and = and i64 %a, 13835058055282163712
|
||||
store i64 %and, i64 addrspace(1)* %out, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}s_and_inline_imm_4.0_i64
|
||||
; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 4.0
|
||||
define void @s_and_inline_imm_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
|
||||
%and = and i64 %a, 4616189618054758400
|
||||
store i64 %and, i64 addrspace(1)* %out, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}s_and_inline_imm_neg_4.0_i64
|
||||
; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, -4.0
|
||||
define void @s_and_inline_imm_neg_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
|
||||
%and = and i64 %a, 13839561654909534208
|
||||
store i64 %and, i64 addrspace(1)* %out, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
; Test with the 64-bit integer bitpattern for a 32-bit float in the
|
||||
; low 32-bits, which is not a valid 64-bit inline immmediate.
|
||||
|
||||
; FUNC-LABEL: {{^}}s_and_inline_imm_f32_4.0_i64
|
||||
; SI-DAG: s_mov_b32 s[[K_LO:[0-9]+]], 4.0
|
||||
; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 0{{$}}
|
||||
; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[K_LO]]:[[K_HI]]{{\]}}
|
||||
define void @s_and_inline_imm_f32_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
|
||||
%and = and i64 %a, 1082130432
|
||||
store i64 %and, i64 addrspace(1)* %out, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
; FIXME: Copy of -1 register
|
||||
; FUNC-LABEL: {{^}}s_and_inline_imm_f32_neg_4.0_i64
|
||||
; SI-DAG: s_mov_b32 s[[K_LO:[0-9]+]], -4.0
|
||||
; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], -1{{$}}
|
||||
; SI-DAG: s_mov_b32 s[[K_HI_COPY:[0-9]+]], s[[K_HI]]
|
||||
; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[K_LO]]:[[K_HI_COPY]]{{\]}}
|
||||
define void @s_and_inline_imm_f32_neg_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
|
||||
%and = and i64 %a, -1065353216
|
||||
store i64 %and, i64 addrspace(1)* %out, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
; Shift into upper 32-bits
|
||||
; FUNC-LABEL: {{^}}s_and_inline_high_imm_f32_4.0_i64
|
||||
; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 4.0
|
||||
; SI-DAG: s_mov_b32 s[[K_LO:[0-9]+]], 0{{$}}
|
||||
; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[K_LO]]:[[K_HI]]{{\]}}
|
||||
define void @s_and_inline_high_imm_f32_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
|
||||
%and = and i64 %a, 4647714815446351872
|
||||
store i64 %and, i64 addrspace(1)* %out, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}s_and_inline_high_imm_f32_neg_4.0_i64
|
||||
; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], -4.0
|
||||
; SI-DAG: s_mov_b32 s[[K_LO:[0-9]+]], 0{{$}}
|
||||
; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[K_LO]]:[[K_HI]]{{\]}}
|
||||
define void @s_and_inline_high_imm_f32_neg_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
|
||||
%and = and i64 %a, 13871086852301127680
|
||||
store i64 %and, i64 addrspace(1)* %out, align 8
|
||||
ret void
|
||||
}
|
||||
|
|
|
@ -23,7 +23,26 @@ entry:
|
|||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}store_inline_imm_0.0_f32
|
||||
; CHECK-LABEL: {{^}}store_imm_neg_0.0_i64:
|
||||
; CHECK-DAG: s_mov_b32 s[[HI_SREG:[0-9]+]], 0x80000000
|
||||
; CHECK-DAG: s_mov_b32 s[[LO_SREG:[0-9]+]], 0{{$}}
|
||||
; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], s[[LO_SREG]]
|
||||
; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], s[[HI_SREG]]
|
||||
; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
|
||||
define void @store_imm_neg_0.0_i64(i64 addrspace(1) *%out) {
|
||||
store i64 -9223372036854775808, i64 addrspace(1) *%out
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}store_inline_imm_neg_0.0_i32:
|
||||
; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80000000
|
||||
; CHECK-NEXT: buffer_store_dword [[REG]]
|
||||
define void @store_inline_imm_neg_0.0_i32(i32 addrspace(1)* %out) {
|
||||
store i32 -2147483648, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}store_inline_imm_0.0_f32:
|
||||
; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
|
||||
; CHECK: buffer_store_dword [[REG]]
|
||||
define void @store_inline_imm_0.0_f32(float addrspace(1)* %out) {
|
||||
|
@ -31,7 +50,7 @@ define void @store_inline_imm_0.0_f32(float addrspace(1)* %out) {
|
|||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}store_imm_neg_0.0_f32
|
||||
; CHECK-LABEL: {{^}}store_imm_neg_0.0_f32:
|
||||
; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80000000
|
||||
; CHECK: buffer_store_dword [[REG]]
|
||||
define void @store_imm_neg_0.0_f32(float addrspace(1)* %out) {
|
||||
|
@ -39,7 +58,7 @@ define void @store_imm_neg_0.0_f32(float addrspace(1)* %out) {
|
|||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}store_inline_imm_0.5_f32
|
||||
; CHECK-LABEL: {{^}}store_inline_imm_0.5_f32:
|
||||
; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], 0.5{{$}}
|
||||
; CHECK: buffer_store_dword [[REG]]
|
||||
define void @store_inline_imm_0.5_f32(float addrspace(1)* %out) {
|
||||
|
@ -47,7 +66,7 @@ define void @store_inline_imm_0.5_f32(float addrspace(1)* %out) {
|
|||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}store_inline_imm_m_0.5_f32
|
||||
; CHECK-LABEL: {{^}}store_inline_imm_m_0.5_f32:
|
||||
; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], -0.5{{$}}
|
||||
; CHECK: buffer_store_dword [[REG]]
|
||||
define void @store_inline_imm_m_0.5_f32(float addrspace(1)* %out) {
|
||||
|
@ -55,7 +74,7 @@ define void @store_inline_imm_m_0.5_f32(float addrspace(1)* %out) {
|
|||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}store_inline_imm_1.0_f32
|
||||
; CHECK-LABEL: {{^}}store_inline_imm_1.0_f32:
|
||||
; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0{{$}}
|
||||
; CHECK: buffer_store_dword [[REG]]
|
||||
define void @store_inline_imm_1.0_f32(float addrspace(1)* %out) {
|
||||
|
@ -63,7 +82,7 @@ define void @store_inline_imm_1.0_f32(float addrspace(1)* %out) {
|
|||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}store_inline_imm_m_1.0_f32
|
||||
; CHECK-LABEL: {{^}}store_inline_imm_m_1.0_f32:
|
||||
; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], -1.0{{$}}
|
||||
; CHECK: buffer_store_dword [[REG]]
|
||||
define void @store_inline_imm_m_1.0_f32(float addrspace(1)* %out) {
|
||||
|
@ -71,7 +90,7 @@ define void @store_inline_imm_m_1.0_f32(float addrspace(1)* %out) {
|
|||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}store_inline_imm_2.0_f32
|
||||
; CHECK-LABEL: {{^}}store_inline_imm_2.0_f32:
|
||||
; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], 2.0{{$}}
|
||||
; CHECK: buffer_store_dword [[REG]]
|
||||
define void @store_inline_imm_2.0_f32(float addrspace(1)* %out) {
|
||||
|
@ -79,7 +98,7 @@ define void @store_inline_imm_2.0_f32(float addrspace(1)* %out) {
|
|||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}store_inline_imm_m_2.0_f32
|
||||
; CHECK-LABEL: {{^}}store_inline_imm_m_2.0_f32:
|
||||
; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], -2.0{{$}}
|
||||
; CHECK: buffer_store_dword [[REG]]
|
||||
define void @store_inline_imm_m_2.0_f32(float addrspace(1)* %out) {
|
||||
|
@ -87,7 +106,7 @@ define void @store_inline_imm_m_2.0_f32(float addrspace(1)* %out) {
|
|||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}store_inline_imm_4.0_f32
|
||||
; CHECK-LABEL: {{^}}store_inline_imm_4.0_f32:
|
||||
; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], 4.0{{$}}
|
||||
; CHECK: buffer_store_dword [[REG]]
|
||||
define void @store_inline_imm_4.0_f32(float addrspace(1)* %out) {
|
||||
|
@ -95,7 +114,7 @@ define void @store_inline_imm_4.0_f32(float addrspace(1)* %out) {
|
|||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}store_inline_imm_m_4.0_f32
|
||||
; CHECK-LABEL: {{^}}store_inline_imm_m_4.0_f32:
|
||||
; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], -4.0{{$}}
|
||||
; CHECK: buffer_store_dword [[REG]]
|
||||
define void @store_inline_imm_m_4.0_f32(float addrspace(1)* %out) {
|
||||
|
@ -111,7 +130,7 @@ define void @store_literal_imm_f32(float addrspace(1)* %out) {
|
|||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_0.0_f32
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_0.0_f32:
|
||||
; CHECK: s_load_dword [[VAL:s[0-9]+]]
|
||||
; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 0, [[VAL]]{{$}}
|
||||
; CHECK: buffer_store_dword [[REG]]
|
||||
|
@ -121,7 +140,7 @@ define void @add_inline_imm_0.0_f32(float addrspace(1)* %out, float %x) {
|
|||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_0.5_f32
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_0.5_f32:
|
||||
; CHECK: s_load_dword [[VAL:s[0-9]+]]
|
||||
; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 0.5, [[VAL]]{{$}}
|
||||
; CHECK: buffer_store_dword [[REG]]
|
||||
|
@ -131,7 +150,7 @@ define void @add_inline_imm_0.5_f32(float addrspace(1)* %out, float %x) {
|
|||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_neg_0.5_f32
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_neg_0.5_f32:
|
||||
; CHECK: s_load_dword [[VAL:s[0-9]+]]
|
||||
; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], -0.5, [[VAL]]{{$}}
|
||||
; CHECK: buffer_store_dword [[REG]]
|
||||
|
@ -141,7 +160,7 @@ define void @add_inline_imm_neg_0.5_f32(float addrspace(1)* %out, float %x) {
|
|||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_1.0_f32
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_1.0_f32:
|
||||
; CHECK: s_load_dword [[VAL:s[0-9]+]]
|
||||
; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 1.0, [[VAL]]{{$}}
|
||||
; CHECK: buffer_store_dword [[REG]]
|
||||
|
@ -151,7 +170,7 @@ define void @add_inline_imm_1.0_f32(float addrspace(1)* %out, float %x) {
|
|||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_neg_1.0_f32
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_neg_1.0_f32:
|
||||
; CHECK: s_load_dword [[VAL:s[0-9]+]]
|
||||
; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], -1.0, [[VAL]]{{$}}
|
||||
; CHECK: buffer_store_dword [[REG]]
|
||||
|
@ -161,7 +180,7 @@ define void @add_inline_imm_neg_1.0_f32(float addrspace(1)* %out, float %x) {
|
|||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_2.0_f32
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_2.0_f32:
|
||||
; CHECK: s_load_dword [[VAL:s[0-9]+]]
|
||||
; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 2.0, [[VAL]]{{$}}
|
||||
; CHECK: buffer_store_dword [[REG]]
|
||||
|
@ -171,7 +190,7 @@ define void @add_inline_imm_2.0_f32(float addrspace(1)* %out, float %x) {
|
|||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_neg_2.0_f32
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_neg_2.0_f32:
|
||||
; CHECK: s_load_dword [[VAL:s[0-9]+]]
|
||||
; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], -2.0, [[VAL]]{{$}}
|
||||
; CHECK: buffer_store_dword [[REG]]
|
||||
|
@ -181,7 +200,7 @@ define void @add_inline_imm_neg_2.0_f32(float addrspace(1)* %out, float %x) {
|
|||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_4.0_f32
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_4.0_f32:
|
||||
; CHECK: s_load_dword [[VAL:s[0-9]+]]
|
||||
; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 4.0, [[VAL]]{{$}}
|
||||
; CHECK: buffer_store_dword [[REG]]
|
||||
|
@ -191,7 +210,7 @@ define void @add_inline_imm_4.0_f32(float addrspace(1)* %out, float %x) {
|
|||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_neg_4.0_f32
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_neg_4.0_f32:
|
||||
; CHECK: s_load_dword [[VAL:s[0-9]+]]
|
||||
; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], -4.0, [[VAL]]{{$}}
|
||||
; CHECK: buffer_store_dword [[REG]]
|
||||
|
@ -201,7 +220,7 @@ define void @add_inline_imm_neg_4.0_f32(float addrspace(1)* %out, float %x) {
|
|||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @commute_add_inline_imm_0.5_f32
|
||||
; CHECK-LABEL: {{^}}commute_add_inline_imm_0.5_f32:
|
||||
; CHECK: buffer_load_dword [[VAL:v[0-9]+]]
|
||||
; CHECK: v_add_f32_e32 [[REG:v[0-9]+]], 0.5, [[VAL]]
|
||||
; CHECK: buffer_store_dword [[REG]]
|
||||
|
@ -212,7 +231,7 @@ define void @commute_add_inline_imm_0.5_f32(float addrspace(1)* %out, float addr
|
|||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @commute_add_literal_f32
|
||||
; CHECK-LABEL: {{^}}commute_add_literal_f32:
|
||||
; CHECK: buffer_load_dword [[VAL:v[0-9]+]]
|
||||
; CHECK: v_add_f32_e32 [[REG:v[0-9]+]], 0x44800000, [[VAL]]
|
||||
; CHECK: buffer_store_dword [[REG]]
|
||||
|
@ -223,7 +242,7 @@ define void @commute_add_literal_f32(float addrspace(1)* %out, float addrspace(1
|
|||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_1_f32
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_1_f32:
|
||||
; CHECK: s_load_dword [[VAL:s[0-9]+]]
|
||||
; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 1, [[VAL]]{{$}}
|
||||
; CHECK: buffer_store_dword [[REG]]
|
||||
|
@ -233,7 +252,7 @@ define void @add_inline_imm_1_f32(float addrspace(1)* %out, float %x) {
|
|||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_2_f32
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_2_f32:
|
||||
; CHECK: s_load_dword [[VAL:s[0-9]+]]
|
||||
; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 2, [[VAL]]{{$}}
|
||||
; CHECK: buffer_store_dword [[REG]]
|
||||
|
@ -243,7 +262,7 @@ define void @add_inline_imm_2_f32(float addrspace(1)* %out, float %x) {
|
|||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_16_f32
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_16_f32:
|
||||
; CHECK: s_load_dword [[VAL:s[0-9]+]]
|
||||
; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 16, [[VAL]]
|
||||
; CHECK: buffer_store_dword [[REG]]
|
||||
|
@ -253,7 +272,7 @@ define void @add_inline_imm_16_f32(float addrspace(1)* %out, float %x) {
|
|||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_neg_1_f32
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_neg_1_f32:
|
||||
; CHECK: s_load_dword [[VAL:s[0-9]+]]
|
||||
; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], -1, [[VAL]]
|
||||
; CHECK: buffer_store_dword [[REG]]
|
||||
|
@ -263,7 +282,7 @@ define void @add_inline_imm_neg_1_f32(float addrspace(1)* %out, float %x) {
|
|||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_neg_2_f32
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_neg_2_f32:
|
||||
; CHECK: s_load_dword [[VAL:s[0-9]+]]
|
||||
; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], -2, [[VAL]]
|
||||
; CHECK: buffer_store_dword [[REG]]
|
||||
|
@ -273,7 +292,7 @@ define void @add_inline_imm_neg_2_f32(float addrspace(1)* %out, float %x) {
|
|||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_neg_16_f32
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_neg_16_f32:
|
||||
; CHECK: s_load_dword [[VAL:s[0-9]+]]
|
||||
; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], -16, [[VAL]]
|
||||
; CHECK: buffer_store_dword [[REG]]
|
||||
|
@ -283,7 +302,7 @@ define void @add_inline_imm_neg_16_f32(float addrspace(1)* %out, float %x) {
|
|||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_63_f32
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_63_f32:
|
||||
; CHECK: s_load_dword [[VAL:s[0-9]+]]
|
||||
; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 63, [[VAL]]
|
||||
; CHECK: buffer_store_dword [[REG]]
|
||||
|
@ -293,7 +312,7 @@ define void @add_inline_imm_63_f32(float addrspace(1)* %out, float %x) {
|
|||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_64_f32
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_64_f32:
|
||||
; CHECK: s_load_dword [[VAL:s[0-9]+]]
|
||||
; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 64, [[VAL]]
|
||||
; CHECK: buffer_store_dword [[REG]]
|
||||
|
@ -303,7 +322,8 @@ define void @add_inline_imm_64_f32(float addrspace(1)* %out, float %x) {
|
|||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_0.0_f64
|
||||
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_0.0_f64:
|
||||
; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 0, [[VAL]]
|
||||
|
@ -314,7 +334,7 @@ define void @add_inline_imm_0.0_f64(double addrspace(1)* %out, double %x) {
|
|||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_0.5_f64
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_0.5_f64:
|
||||
; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 0.5, [[VAL]]
|
||||
|
@ -325,7 +345,7 @@ define void @add_inline_imm_0.5_f64(double addrspace(1)* %out, double %x) {
|
|||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_neg_0.5_f64
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_neg_0.5_f64:
|
||||
; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -0.5, [[VAL]]
|
||||
|
@ -336,7 +356,7 @@ define void @add_inline_imm_neg_0.5_f64(double addrspace(1)* %out, double %x) {
|
|||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_1.0_f64
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_1.0_f64:
|
||||
; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 1.0, [[VAL]]
|
||||
|
@ -347,7 +367,7 @@ define void @add_inline_imm_1.0_f64(double addrspace(1)* %out, double %x) {
|
|||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_neg_1.0_f64
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_neg_1.0_f64:
|
||||
; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -1.0, [[VAL]]
|
||||
|
@ -358,7 +378,7 @@ define void @add_inline_imm_neg_1.0_f64(double addrspace(1)* %out, double %x) {
|
|||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_2.0_f64
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_2.0_f64:
|
||||
; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 2.0, [[VAL]]
|
||||
|
@ -369,7 +389,7 @@ define void @add_inline_imm_2.0_f64(double addrspace(1)* %out, double %x) {
|
|||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_neg_2.0_f64
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_neg_2.0_f64:
|
||||
; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -2.0, [[VAL]]
|
||||
|
@ -380,7 +400,7 @@ define void @add_inline_imm_neg_2.0_f64(double addrspace(1)* %out, double %x) {
|
|||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_4.0_f64
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_4.0_f64:
|
||||
; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 4.0, [[VAL]]
|
||||
|
@ -391,7 +411,7 @@ define void @add_inline_imm_4.0_f64(double addrspace(1)* %out, double %x) {
|
|||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_neg_4.0_f64
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_neg_4.0_f64:
|
||||
; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -4.0, [[VAL]]
|
||||
|
@ -403,7 +423,7 @@ define void @add_inline_imm_neg_4.0_f64(double addrspace(1)* %out, double %x) {
|
|||
}
|
||||
|
||||
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_1_f64
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_1_f64:
|
||||
; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 1, [[VAL]]
|
||||
|
@ -414,7 +434,7 @@ define void @add_inline_imm_1_f64(double addrspace(1)* %out, double %x) {
|
|||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_2_f64
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_2_f64:
|
||||
; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 2, [[VAL]]
|
||||
|
@ -425,7 +445,7 @@ define void @add_inline_imm_2_f64(double addrspace(1)* %out, double %x) {
|
|||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_16_f64
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_16_f64:
|
||||
; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 16, [[VAL]]
|
||||
|
@ -436,7 +456,7 @@ define void @add_inline_imm_16_f64(double addrspace(1)* %out, double %x) {
|
|||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_neg_1_f64
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_neg_1_f64:
|
||||
; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -1, [[VAL]]
|
||||
|
@ -447,7 +467,7 @@ define void @add_inline_imm_neg_1_f64(double addrspace(1)* %out, double %x) {
|
|||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_neg_2_f64
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_neg_2_f64:
|
||||
; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -2, [[VAL]]
|
||||
|
@ -458,7 +478,7 @@ define void @add_inline_imm_neg_2_f64(double addrspace(1)* %out, double %x) {
|
|||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_neg_16_f64
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_neg_16_f64:
|
||||
; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -16, [[VAL]]
|
||||
|
@ -469,7 +489,7 @@ define void @add_inline_imm_neg_16_f64(double addrspace(1)* %out, double %x) {
|
|||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_63_f64
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_63_f64:
|
||||
; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 63, [[VAL]]
|
||||
|
@ -480,7 +500,7 @@ define void @add_inline_imm_63_f64(double addrspace(1)* %out, double %x) {
|
|||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_64_f64
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_64_f64:
|
||||
; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 64, [[VAL]]
|
||||
|
@ -492,7 +512,7 @@ define void @add_inline_imm_64_f64(double addrspace(1)* %out, double %x) {
|
|||
}
|
||||
|
||||
|
||||
; CHECK-LABEL: {{^}}store_inline_imm_0.0_f64
|
||||
; CHECK-LABEL: {{^}}store_inline_imm_0.0_f64:
|
||||
; CHECK: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0
|
||||
; CHECK: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0
|
||||
; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
|
||||
|
@ -500,3 +520,98 @@ define void @store_inline_imm_0.0_f64(double addrspace(1)* %out) {
|
|||
store double 0.0, double addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
; CHECK-LABEL: {{^}}store_literal_imm_neg_0.0_f64:
|
||||
; CHECK-DAG: s_mov_b32 s[[HI_SREG:[0-9]+]], 0x80000000
|
||||
; CHECK-DAG: s_mov_b32 s[[LO_SREG:[0-9]+]], 0{{$}}
|
||||
; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], s[[LO_SREG]]
|
||||
; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], s[[HI_SREG]]
|
||||
; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
|
||||
define void @store_literal_imm_neg_0.0_f64(double addrspace(1)* %out) {
|
||||
store double -0.0, double addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}store_inline_imm_0.5_f64:
|
||||
; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0{{$}}
|
||||
; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0x3fe00000
|
||||
; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
|
||||
define void @store_inline_imm_0.5_f64(double addrspace(1)* %out) {
|
||||
store double 0.5, double addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}store_inline_imm_m_0.5_f64:
|
||||
; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0{{$}}
|
||||
; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0xbfe00000
|
||||
; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
|
||||
define void @store_inline_imm_m_0.5_f64(double addrspace(1)* %out) {
|
||||
store double -0.5, double addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}store_inline_imm_1.0_f64:
|
||||
; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0{{$}}
|
||||
; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0x3ff00000
|
||||
; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
|
||||
define void @store_inline_imm_1.0_f64(double addrspace(1)* %out) {
|
||||
store double 1.0, double addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}store_inline_imm_m_1.0_f64:
|
||||
; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0{{$}}
|
||||
; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0xbff00000
|
||||
; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
|
||||
define void @store_inline_imm_m_1.0_f64(double addrspace(1)* %out) {
|
||||
store double -1.0, double addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}store_inline_imm_2.0_f64:
|
||||
; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0{{$}}
|
||||
; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 2.0
|
||||
; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
|
||||
define void @store_inline_imm_2.0_f64(double addrspace(1)* %out) {
|
||||
store double 2.0, double addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}store_inline_imm_m_2.0_f64:
|
||||
; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0{{$}}
|
||||
; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], -2.0
|
||||
; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
|
||||
define void @store_inline_imm_m_2.0_f64(double addrspace(1)* %out) {
|
||||
store double -2.0, double addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}store_inline_imm_4.0_f64:
|
||||
; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0{{$}}
|
||||
; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0x40100000
|
||||
; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
|
||||
define void @store_inline_imm_4.0_f64(double addrspace(1)* %out) {
|
||||
store double 4.0, double addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}store_inline_imm_m_4.0_f64:
|
||||
; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0{{$}}
|
||||
; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0xc0100000
|
||||
; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
|
||||
define void @store_inline_imm_m_4.0_f64(double addrspace(1)* %out) {
|
||||
store double -4.0, double addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}store_literal_imm_f64:
|
||||
; CHECK-DAG: s_mov_b32 s[[HI_SREG:[0-9]+]], 0x40b00000
|
||||
; CHECK-DAG: s_mov_b32 s[[LO_SREG:[0-9]+]], 0{{$}}
|
||||
; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], s[[LO_SREG]]
|
||||
; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], s[[HI_SREG]]
|
||||
; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
|
||||
define void @store_literal_imm_f64(double addrspace(1)* %out) {
|
||||
store double 4096.0, double addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
|
|
@ -49,3 +49,20 @@ define void @v_select_trunc_i64_2(i32 addrspace(1)* %out, i32 %cond, i64 addrspa
|
|||
store i32 %trunc, i32 addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}v_select_i64_split_imm:
|
||||
; CHECK: s_mov_b32 [[SHI:s[0-9]+]], 63
|
||||
; CHECK: s_mov_b32 [[SLO:s[0-9]+]], 0
|
||||
; CHECK-DAG: v_mov_b32_e32 [[VHI:v[0-9]+]], [[SHI]]
|
||||
; CHECK-DAG: v_mov_b32_e32 [[VLO:v[0-9]+]], [[SLO]]
|
||||
; CHECK-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, [[VLO]], {{v[0-9]+}}
|
||||
; CHECK-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, [[VHI]], {{v[0-9]+}}
|
||||
; CHECK: s_endpgm
|
||||
define void @v_select_i64_split_imm(i64 addrspace(1)* %out, i32 %cond, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
|
||||
%cmp = icmp ugt i32 %cond, 5
|
||||
%a = load i64 addrspace(1)* %aptr, align 8
|
||||
%b = load i64 addrspace(1)* %bptr, align 8
|
||||
%sel = select i1 %cmp, i64 %a, i64 270582939648 ; 63 << 32
|
||||
store i64 %sel, i64 addrspace(1)* %out, align 8
|
||||
ret void
|
||||
}
|
||||
|
|
|
@ -10,12 +10,13 @@ define void @sint_to_fp_i32_to_f64(double addrspace(1)* %out, i32 %in) {
|
|||
ret void
|
||||
}
|
||||
|
||||
; FIXME: select on 0, 0
|
||||
; SI-LABEL: {{^}}sint_to_fp_i1_f64:
|
||||
; SI: v_cmp_eq_i32_e64 [[CMP:s\[[0-9]+:[0-9]\]]],
|
||||
; We can't fold the SGPRs into v_cndmask_b32_e64, because it already
|
||||
; uses an SGPR for [[CMP]]
|
||||
; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, v{{[0-9]+}}, [[CMP]]
|
||||
; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, v{{[0-9]+}}, [[CMP]]
|
||||
; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 0, [[CMP]]
|
||||
; SI: buffer_store_dwordx2
|
||||
; SI: s_endpgm
|
||||
define void @sint_to_fp_i1_f64(double addrspace(1)* %out, i32 %in) {
|
||||
|
|
|
@ -70,12 +70,13 @@ define void @s_uint_to_fp_v4i32_to_v4f64(<4 x double> addrspace(1)* %out, <4 x i
|
|||
ret void
|
||||
}
|
||||
|
||||
; FIXME: select on 0, 0
|
||||
; SI-LABEL: {{^}}uint_to_fp_i1_to_f64:
|
||||
; SI: v_cmp_eq_i32_e64 [[CMP:s\[[0-9]+:[0-9]\]]],
|
||||
; We can't fold the SGPRs into v_cndmask_b32_e64, because it already
|
||||
; uses an SGPR for [[CMP]]
|
||||
; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, v{{[0-9]+}}, [[CMP]]
|
||||
; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, v{{[0-9]+}}, [[CMP]]
|
||||
; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 0, [[CMP]]
|
||||
; SI: buffer_store_dwordx2
|
||||
; SI: s_endpgm
|
||||
define void @uint_to_fp_i1_to_f64(double addrspace(1)* %out, i32 %in) {
|
||||
|
|
Loading…
Reference in New Issue