From 11a4d6774b7c98c55f225a02f982a91c781768d1 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 13 Feb 2015 19:05:03 +0000 Subject: [PATCH] R600/SI: Allow f64 inline immediates in i64 operands This requires considering the size of the operand when checking immediate legality. llvm-svn: 229135 --- .../R600/MCTargetDesc/SIMCCodeEmitter.cpp | 110 ++++++--- llvm/lib/Target/R600/SIFoldOperands.cpp | 3 +- llvm/lib/Target/R600/SIISelLowering.cpp | 6 +- llvm/lib/Target/R600/SIInstrInfo.cpp | 62 ++++-- llvm/lib/Target/R600/SIInstrInfo.h | 22 +- llvm/lib/Target/R600/SIShrinkInstructions.cpp | 17 +- llvm/test/CodeGen/R600/and.ll | 151 ++++++++++++- llvm/test/CodeGen/R600/imm.ll | 209 ++++++++++++++---- llvm/test/CodeGen/R600/select64.ll | 17 ++ llvm/test/CodeGen/R600/sint_to_fp.f64.ll | 3 +- llvm/test/CodeGen/R600/uint_to_fp.f64.ll | 3 +- 11 files changed, 482 insertions(+), 121 deletions(-) diff --git a/llvm/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp b/llvm/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp index 640de3f9fc84..12aaaa7c7aad 100644 --- a/llvm/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp +++ b/llvm/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp @@ -33,8 +33,8 @@ namespace { /// \brief Helper type used in encoding typedef union { - int32_t I; - float F; + int64_t I; + double F; } IntFloatUnion; class SIMCCodeEmitter : public AMDGPUMCCodeEmitter { @@ -48,7 +48,7 @@ class SIMCCodeEmitter : public AMDGPUMCCodeEmitter { bool isSrcOperand(const MCInstrDesc &Desc, unsigned OpNo) const; /// \brief Encode an fp or int literal - uint32_t getLitEncoding(const MCOperand &MO) const; + uint32_t getLitEncoding(const MCOperand &MO, unsigned OpSize) const; public: SIMCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri, @@ -91,51 +91,101 @@ bool SIMCCodeEmitter::isSrcOperand(const MCInstrDesc &Desc, OpType == AMDGPU::OPERAND_REG_INLINE_C; } -uint32_t SIMCCodeEmitter::getLitEncoding(const MCOperand &MO) const { +// Returns the encoding value to use if the given integer is an integer inline +// immediate value, or 0 if it is not. +template +static uint32_t getIntInlineImmEncoding(IntTy Imm) { + if (Imm >= 0 && Imm <= 64) + return 128 + Imm; - IntFloatUnion Imm; - if (MO.isImm()) - Imm.I = MO.getImm(); - else if (MO.isFPImm()) - Imm.F = MO.getFPImm(); - else if (MO.isExpr()) - return 255; - else - return ~0; + if (Imm >= -16 && Imm <= -1) + return 192 + std::abs(Imm); - if (Imm.I >= 0 && Imm.I <= 64) - return 128 + Imm.I; + return 0; +} - if (Imm.I >= -16 && Imm.I <= -1) - return 192 + abs(Imm.I); +static uint32_t getLit32Encoding(uint32_t Val) { + uint32_t IntImm = getIntInlineImmEncoding(static_cast(Val)); + if (IntImm != 0) + return IntImm; - if (Imm.F == 0.5f) + if (Val == FloatToBits(0.5f)) return 240; - if (Imm.F == -0.5f) + if (Val == FloatToBits(-0.5f)) return 241; - if (Imm.F == 1.0f) + if (Val == FloatToBits(1.0f)) return 242; - if (Imm.F == -1.0f) + if (Val == FloatToBits(-1.0f)) return 243; - if (Imm.F == 2.0f) + if (Val == FloatToBits(2.0f)) return 244; - if (Imm.F == -2.0f) + if (Val == FloatToBits(-2.0f)) return 245; - if (Imm.F == 4.0f) + if (Val == FloatToBits(4.0f)) return 246; - if (Imm.F == -4.0f) + if (Val == FloatToBits(-4.0f)) return 247; return 255; } +static uint32_t getLit64Encoding(uint64_t Val) { + uint32_t IntImm = getIntInlineImmEncoding(static_cast(Val)); + if (IntImm != 0) + return IntImm; + + if (Val == DoubleToBits(0.5)) + return 240; + + if (Val == DoubleToBits(-0.5)) + return 241; + + if (Val == DoubleToBits(1.0)) + return 242; + + if (Val == DoubleToBits(-1.0)) + return 243; + + if (Val == DoubleToBits(2.0)) + return 244; + + if (Val == DoubleToBits(-2.0)) + return 245; + + if (Val == DoubleToBits(4.0)) + return 246; + + if (Val == DoubleToBits(-4.0)) + return 247; + + return 255; +} + +uint32_t SIMCCodeEmitter::getLitEncoding(const MCOperand &MO, + unsigned OpSize) const { + if (MO.isExpr()) + return 255; + + assert(!MO.isFPImm()); + + if (!MO.isImm()) + return ~0; + + if (OpSize == 4) + return getLit32Encoding(static_cast(MO.getImm())); + + assert(OpSize == 8); + + return getLit64Encoding(static_cast(MO.getImm())); +} + void SIMCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const { @@ -158,9 +208,12 @@ void SIMCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS, if (!isSrcOperand(Desc, i)) continue; + int RCID = Desc.OpInfo[i].RegClass; + const MCRegisterClass &RC = MRI.getRegClass(RCID); + // Is this operand a literal immediate? const MCOperand &Op = MI.getOperand(i); - if (getLitEncoding(Op) != 255) + if (getLitEncoding(Op, RC.getSize()) != 255) continue; // Yes! Encode it @@ -231,7 +284,10 @@ uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI, const MCInstrDesc &Desc = MCII.get(MI.getOpcode()); if (isSrcOperand(Desc, OpNo)) { - uint32_t Enc = getLitEncoding(MO); + int RCID = Desc.OpInfo[OpNo].RegClass; + const MCRegisterClass &RC = MRI.getRegClass(RCID); + + uint32_t Enc = getLitEncoding(MO, RC.getSize()); if (Enc != ~0U && (Enc != 255 || Desc.getSize() == 4)) return Enc; diff --git a/llvm/lib/Target/R600/SIFoldOperands.cpp b/llvm/lib/Target/R600/SIFoldOperands.cpp index 64f1b3d8cfdd..848638fae797 100644 --- a/llvm/lib/Target/R600/SIFoldOperands.cpp +++ b/llvm/lib/Target/R600/SIFoldOperands.cpp @@ -172,6 +172,7 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) { if (!isSafeToFold(MI.getOpcode())) continue; + unsigned OpSize = TII->getOpSize(MI, 1); MachineOperand &OpToFold = MI.getOperand(1); bool FoldingImm = OpToFold.isImm(); @@ -183,7 +184,7 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) { // Folding immediates with more than one use will increase program size. // FIXME: This will also reduce register usage, which may be better // in some cases. A better heuristic is needed. - if (FoldingImm && !TII->isInlineConstant(OpToFold) && + if (FoldingImm && !TII->isInlineConstant(OpToFold, OpSize) && !MRI.hasOneUse(MI.getOperand(0).getReg())) continue; diff --git a/llvm/lib/Target/R600/SIISelLowering.cpp b/llvm/lib/Target/R600/SIISelLowering.cpp index d8cee5ae4282..141ba80492f8 100644 --- a/llvm/lib/Target/R600/SIISelLowering.cpp +++ b/llvm/lib/Target/R600/SIISelLowering.cpp @@ -1734,13 +1734,11 @@ int32_t SITargetLowering::analyzeImmediate(const SDNode *N) const { static_cast(Subtarget->getInstrInfo()); if (const ConstantSDNode *Node = dyn_cast(N)) { - if (Node->getZExtValue() >> 32) - return -1; - if (TII->isInlineConstant(Node->getAPIntValue())) return 0; - return Node->getZExtValue(); + uint64_t Val = Node->getZExtValue(); + return isUInt<32>(Val) ? Val : -1; } if (const ConstantFPSDNode *Node = dyn_cast(N)) { diff --git a/llvm/lib/Target/R600/SIInstrInfo.cpp b/llvm/lib/Target/R600/SIInstrInfo.cpp index 88a6677e7b24..7762b6eccf88 100644 --- a/llvm/lib/Target/R600/SIInstrInfo.cpp +++ b/llvm/lib/Target/R600/SIInstrInfo.cpp @@ -977,15 +977,25 @@ bool SIInstrInfo::isInlineConstant(const APInt &Imm) const { (FloatToBits(-4.0f) == Val); } -bool SIInstrInfo::isInlineConstant(const MachineOperand &MO) const { - if (MO.isImm()) - return isInlineConstant(APInt(32, MO.getImm(), true)); +bool SIInstrInfo::isInlineConstant(const MachineOperand &MO, + unsigned OpSize) const { + if (MO.isImm()) { + // MachineOperand provides no way to tell the true operand size, since it + // only records a 64-bit value. We need to know the size to determine if a + // 32-bit floating point immediate bit pattern is legal for an integer + // immediate. It would be for any 32-bit integer operand, but would not be + // for a 64-bit one. + + unsigned BitSize = 8 * OpSize; + return isInlineConstant(APInt(BitSize, MO.getImm(), true)); + } return false; } -bool SIInstrInfo::isLiteralConstant(const MachineOperand &MO) const { - return MO.isImm() && !isInlineConstant(MO); +bool SIInstrInfo::isLiteralConstant(const MachineOperand &MO, + unsigned OpSize) const { + return MO.isImm() && !isInlineConstant(MO, OpSize); } static bool compareMachineOp(const MachineOperand &Op0, @@ -1015,7 +1025,8 @@ bool SIInstrInfo::isImmOperandLegal(const MachineInstr *MI, unsigned OpNo, if (OpInfo.RegClass < 0) return false; - if (isLiteralConstant(MO)) + unsigned OpSize = RI.getRegClass(OpInfo.RegClass)->getSize(); + if (isLiteralConstant(MO, OpSize)) return RI.opCanUseLiteralConstant(OpInfo.OperandType); return RI.opCanUseInlineConstant(OpInfo.OperandType); @@ -1070,9 +1081,10 @@ bool SIInstrInfo::hasModifiersSet(const MachineInstr &MI, } bool SIInstrInfo::usesConstantBus(const MachineRegisterInfo &MRI, - const MachineOperand &MO) const { + const MachineOperand &MO, + unsigned OpSize) const { // Literal constants use the constant bus. - if (isLiteralConstant(MO)) + if (isLiteralConstant(MO, OpSize)) return true; if (!MO.isReg() || !MO.isUse()) @@ -1134,9 +1146,13 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI, case AMDGPU::OPERAND_REG_IMM32: break; case AMDGPU::OPERAND_REG_INLINE_C: - if (MI->getOperand(i).isImm() && !isInlineConstant(MI->getOperand(i))) { - ErrInfo = "Illegal immediate value for operand."; - return false; + if (MI->getOperand(i).isImm()) { + int RegClass = Desc.OpInfo[i].RegClass; + const TargetRegisterClass *RC = RI.getRegClass(RegClass); + if (!isInlineConstant(MI->getOperand(i), RC->getSize())) { + ErrInfo = "Illegal immediate value for operand."; + return false; + } } break; case MCOI::OPERAND_IMMEDIATE: @@ -1182,9 +1198,8 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI, for (int OpIdx : OpIndices) { if (OpIdx == -1) break; - const MachineOperand &MO = MI->getOperand(OpIdx); - if (usesConstantBus(MRI, MO)) { + if (usesConstantBus(MRI, MO, getOpSize(Opcode, OpIdx))) { if (MO.isReg()) { if (MO.getReg() != SGPRUsed) ++ConstantBusCount; @@ -1211,15 +1226,18 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI, // Verify VOP3 if (isVOP3(Opcode)) { - if (Src0Idx != -1 && isLiteralConstant(MI->getOperand(Src0Idx))) { + if (Src0Idx != -1 && + isLiteralConstant(MI->getOperand(Src0Idx), getOpSize(Opcode, Src0Idx))) { ErrInfo = "VOP3 src0 cannot be a literal constant."; return false; } - if (Src1Idx != -1 && isLiteralConstant(MI->getOperand(Src1Idx))) { + if (Src1Idx != -1 && + isLiteralConstant(MI->getOperand(Src1Idx), getOpSize(Opcode, Src1Idx))) { ErrInfo = "VOP3 src1 cannot be a literal constant."; return false; } - if (Src2Idx != -1 && isLiteralConstant(MI->getOperand(Src2Idx))) { + if (Src2Idx != -1 && + isLiteralConstant(MI->getOperand(Src2Idx), getOpSize(Opcode, Src2Idx))) { ErrInfo = "VOP3 src2 cannot be a literal constant."; return false; } @@ -1312,7 +1330,7 @@ const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI, if (TargetRegisterInfo::isVirtualRegister(Reg)) return MRI.getRegClass(Reg); - return RI.getRegClass(Reg); + return RI.getPhysRegClass(Reg); } unsigned RCID = Desc.OpInfo[OpNo].RegClass; @@ -1456,14 +1474,16 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr *MI, unsigned OpIdx, if (!MO) MO = &MI->getOperand(OpIdx); - if (isVALU(InstDesc.Opcode) && usesConstantBus(MRI, *MO)) { + if (isVALU(InstDesc.Opcode) && + usesConstantBus(MRI, *MO, DefinedRC->getSize())) { unsigned SGPRUsed = MO->isReg() ? MO->getReg() : (unsigned)AMDGPU::NoRegister; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { if (i == OpIdx) continue; - if (usesConstantBus(MRI, MI->getOperand(i)) && - MI->getOperand(i).isReg() && MI->getOperand(i).getReg() != SGPRUsed) { + const MachineOperand &Op = MI->getOperand(i); + if (Op.isReg() && Op.getReg() != SGPRUsed && + usesConstantBus(MRI, Op, getOpSize(*MI, i))) { return false; } } @@ -1556,7 +1576,7 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const { // We can use one SGPR in each VOP3 instruction. continue; } - } else if (!isLiteralConstant(MO)) { + } else if (!isLiteralConstant(MO, getOpSize(MI->getOpcode(), Idx))) { // If it is not a register and not a literal constant, then it must be // an inline constant which is always legal. continue; diff --git a/llvm/lib/Target/R600/SIInstrInfo.h b/llvm/lib/Target/R600/SIInstrInfo.h index b25e35e3add0..f3285cff6c4f 100644 --- a/llvm/lib/Target/R600/SIInstrInfo.h +++ b/llvm/lib/Target/R600/SIInstrInfo.h @@ -209,8 +209,8 @@ public: } bool isInlineConstant(const APInt &Imm) const; - bool isInlineConstant(const MachineOperand &MO) const; - bool isLiteralConstant(const MachineOperand &MO) const; + bool isInlineConstant(const MachineOperand &MO, unsigned OpSize) const; + bool isLiteralConstant(const MachineOperand &MO, unsigned OpSize) const; bool isImmOperandLegal(const MachineInstr *MI, unsigned OpNo, const MachineOperand &MO) const; @@ -225,7 +225,8 @@ public: /// \brief Returns true if this operand uses the constant bus. bool usesConstantBus(const MachineRegisterInfo &MRI, - const MachineOperand &MO) const; + const MachineOperand &MO, + unsigned OpSize) const; /// \brief Return true if this instruction has any modifiers. /// e.g. src[012]_mod, omod, clamp. @@ -247,7 +248,20 @@ public: /// the register class of its machine operand. /// to infer the correct register class base on the other operands. const TargetRegisterClass *getOpRegClass(const MachineInstr &MI, - unsigned OpNo) const;\ + unsigned OpNo) const; + + /// \brief Return the size in bytes of the operand OpNo on the given + // instruction opcode. + unsigned getOpSize(uint16_t Opcode, unsigned OpNo) const { + const MCOperandInfo &OpInfo = get(Opcode).OpInfo[OpNo]; + return RI.getRegClass(OpInfo.RegClass)->getSize(); + } + + /// \brief This form should usually be preferred since it handles operands + /// with unknown register classes. + unsigned getOpSize(const MachineInstr &MI, unsigned OpNo) const { + return getOpRegClass(MI, OpNo)->getSize(); + } /// \returns true if it is legal for the operand at index \p OpNo /// to read a VGPR. diff --git a/llvm/lib/Target/R600/SIShrinkInstructions.cpp b/llvm/lib/Target/R600/SIShrinkInstructions.cpp index 6a3410688fe7..97bbd78d621f 100644 --- a/llvm/lib/Target/R600/SIShrinkInstructions.cpp +++ b/llvm/lib/Target/R600/SIShrinkInstructions.cpp @@ -127,30 +127,31 @@ static void foldImmediates(MachineInstr &MI, const SIInstrInfo *TII, TII->isVOPC(MI.getOpcode())); const SIRegisterInfo &TRI = TII->getRegisterInfo(); - MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0); + int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0); + MachineOperand &Src0 = MI.getOperand(Src0Idx); // Only one literal constant is allowed per instruction, so if src0 is a // literal constant then we can't do any folding. - if (Src0->isImm() && TII->isLiteralConstant(*Src0)) + if (Src0.isImm() && + TII->isLiteralConstant(Src0, TII->getOpSize(MI, Src0Idx))) return; - // Literal constants and SGPRs can only be used in Src0, so if Src0 is an // SGPR, we cannot commute the instruction, so we can't fold any literal // constants. - if (Src0->isReg() && !isVGPR(Src0, TRI, MRI)) + if (Src0.isReg() && !isVGPR(&Src0, TRI, MRI)) return; // Try to fold Src0 - if (Src0->isReg()) { - unsigned Reg = Src0->getReg(); + if (Src0.isReg()) { + unsigned Reg = Src0.getReg(); MachineInstr *Def = MRI.getUniqueVRegDef(Reg); if (Def && Def->isMoveImmediate()) { MachineOperand &MovSrc = Def->getOperand(1); bool ConstantFolded = false; if (MovSrc.isImm() && isUInt<32>(MovSrc.getImm())) { - Src0->ChangeToImmediate(MovSrc.getImm()); + Src0.ChangeToImmediate(MovSrc.getImm()); ConstantFolded = true; } if (ConstantFolded) { @@ -189,7 +190,7 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { const MachineOperand &Src = MI.getOperand(1); if (Src.isImm()) { - if (isInt<16>(Src.getImm()) && !TII->isInlineConstant(Src)) + if (isInt<16>(Src.getImm()) && !TII->isInlineConstant(Src, 4)) MI.setDesc(TII->get(AMDGPU::S_MOVK_I32)); } diff --git a/llvm/test/CodeGen/R600/and.ll b/llvm/test/CodeGen/R600/and.ll index 7a395ccb38d0..bb7cba399343 100644 --- a/llvm/test/CodeGen/R600/and.ll +++ b/llvm/test/CodeGen/R600/and.ll @@ -64,8 +64,8 @@ define void @v_and_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addr ret void } -; FUNC-LABEL: {{^}}v_and_constant_i32: -; SI: v_and_b32 +; FUNC-LABEL: {{^}}v_and_constant_i32 +; SI: v_and_b32_e32 v{{[0-9]+}}, 0x12d687, v{{[0-9]+}} define void @v_and_constant_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) { %a = load i32 addrspace(1)* %aptr, align 4 %and = and i32 %a, 1234567 @@ -73,7 +73,25 @@ define void @v_and_constant_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) ret void } -; FUNC-LABEL: {{^}}s_and_i64: +; FUNC-LABEL: {{^}}v_and_inline_imm_64_i32 +; SI: v_and_b32_e32 v{{[0-9]+}}, 64, v{{[0-9]+}} +define void @v_and_inline_imm_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) { + %a = load i32 addrspace(1)* %aptr, align 4 + %and = and i32 %a, 64 + store i32 %and, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}v_and_inline_imm_neg_16_i32 +; SI: v_and_b32_e32 v{{[0-9]+}}, -16, v{{[0-9]+}} +define void @v_and_inline_imm_neg_16_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) { + %a = load i32 addrspace(1)* %aptr, align 4 + %and = and i32 %a, -16 + store i32 %and, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}s_and_i64 ; SI: s_and_b64 define void @s_and_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) { %and = and i64 %a, %b @@ -90,8 +108,8 @@ define void @s_and_i1(i1 addrspace(1)* %out, i1 %a, i1 %b) { ret void } -; FUNC-LABEL: {{^}}s_and_constant_i64: -; SI: s_and_b64 +; FUNC-LABEL: {{^}}s_and_constant_i64 +; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} define void @s_and_constant_i64(i64 addrspace(1)* %out, i64 %a) { %and = and i64 %a, 281474976710655 store i64 %and, i64 addrspace(1)* %out, align 8 @@ -150,10 +168,129 @@ define void @v_and_inline_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %apt ret void } -; FUNC-LABEL: {{^}}s_and_inline_imm_i64: +; FUNC-LABEL: {{^}}s_and_inline_imm_64_i64 ; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 64 -define void @s_and_inline_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { +define void @s_and_inline_imm_64_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { %and = and i64 %a, 64 store i64 %and, i64 addrspace(1)* %out, align 8 ret void } + +; FUNC-LABEL: {{^}}s_and_inline_imm_1_i64 +; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 1 +define void @s_and_inline_imm_1_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { + %and = and i64 %a, 1 + store i64 %and, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: {{^}}s_and_inline_imm_1.0_i64 +; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 1.0 +define void @s_and_inline_imm_1.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { + %and = and i64 %a, 4607182418800017408 + store i64 %and, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: {{^}}s_and_inline_imm_neg_1.0_i64 +; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, -1.0 +define void @s_and_inline_imm_neg_1.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { + %and = and i64 %a, 13830554455654793216 + store i64 %and, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: {{^}}s_and_inline_imm_0.5_i64 +; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0.5 +define void @s_and_inline_imm_0.5_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { + %and = and i64 %a, 4602678819172646912 + store i64 %and, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: {{^}}s_and_inline_imm_neg_0.5_i64 +; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, -0.5 +define void @s_and_inline_imm_neg_0.5_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { + %and = and i64 %a, 13826050856027422720 + store i64 %and, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: {{^}}s_and_inline_imm_2.0_i64 +; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 2.0 +define void @s_and_inline_imm_2.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { + %and = and i64 %a, 4611686018427387904 + store i64 %and, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: {{^}}s_and_inline_imm_neg_2.0_i64 +; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, -2.0 +define void @s_and_inline_imm_neg_2.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { + %and = and i64 %a, 13835058055282163712 + store i64 %and, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: {{^}}s_and_inline_imm_4.0_i64 +; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 4.0 +define void @s_and_inline_imm_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { + %and = and i64 %a, 4616189618054758400 + store i64 %and, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: {{^}}s_and_inline_imm_neg_4.0_i64 +; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, -4.0 +define void @s_and_inline_imm_neg_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { + %and = and i64 %a, 13839561654909534208 + store i64 %and, i64 addrspace(1)* %out, align 8 + ret void +} + + +; Test with the 64-bit integer bitpattern for a 32-bit float in the +; low 32-bits, which is not a valid 64-bit inline immmediate. + +; FUNC-LABEL: {{^}}s_and_inline_imm_f32_4.0_i64 +; SI-DAG: s_mov_b32 s[[K_LO:[0-9]+]], 4.0 +; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 0{{$}} +; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[K_LO]]:[[K_HI]]{{\]}} +define void @s_and_inline_imm_f32_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { + %and = and i64 %a, 1082130432 + store i64 %and, i64 addrspace(1)* %out, align 8 + ret void +} + +; FIXME: Copy of -1 register +; FUNC-LABEL: {{^}}s_and_inline_imm_f32_neg_4.0_i64 +; SI-DAG: s_mov_b32 s[[K_LO:[0-9]+]], -4.0 +; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], -1{{$}} +; SI-DAG: s_mov_b32 s[[K_HI_COPY:[0-9]+]], s[[K_HI]] +; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[K_LO]]:[[K_HI_COPY]]{{\]}} +define void @s_and_inline_imm_f32_neg_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { + %and = and i64 %a, -1065353216 + store i64 %and, i64 addrspace(1)* %out, align 8 + ret void +} + +; Shift into upper 32-bits +; FUNC-LABEL: {{^}}s_and_inline_high_imm_f32_4.0_i64 +; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 4.0 +; SI-DAG: s_mov_b32 s[[K_LO:[0-9]+]], 0{{$}} +; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[K_LO]]:[[K_HI]]{{\]}} +define void @s_and_inline_high_imm_f32_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { + %and = and i64 %a, 4647714815446351872 + store i64 %and, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: {{^}}s_and_inline_high_imm_f32_neg_4.0_i64 +; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], -4.0 +; SI-DAG: s_mov_b32 s[[K_LO:[0-9]+]], 0{{$}} +; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[K_LO]]:[[K_HI]]{{\]}} +define void @s_and_inline_high_imm_f32_neg_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { + %and = and i64 %a, 13871086852301127680 + store i64 %and, i64 addrspace(1)* %out, align 8 + ret void +} diff --git a/llvm/test/CodeGen/R600/imm.ll b/llvm/test/CodeGen/R600/imm.ll index c8fd8b72091a..9b95fd66b9d5 100644 --- a/llvm/test/CodeGen/R600/imm.ll +++ b/llvm/test/CodeGen/R600/imm.ll @@ -23,7 +23,26 @@ entry: ret void } -; CHECK-LABEL: {{^}}store_inline_imm_0.0_f32 +; CHECK-LABEL: {{^}}store_imm_neg_0.0_i64: +; CHECK-DAG: s_mov_b32 s[[HI_SREG:[0-9]+]], 0x80000000 +; CHECK-DAG: s_mov_b32 s[[LO_SREG:[0-9]+]], 0{{$}} +; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], s[[LO_SREG]] +; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], s[[HI_SREG]] +; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}} +define void @store_imm_neg_0.0_i64(i64 addrspace(1) *%out) { + store i64 -9223372036854775808, i64 addrspace(1) *%out + ret void +} + +; CHECK-LABEL: {{^}}store_inline_imm_neg_0.0_i32: +; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80000000 +; CHECK-NEXT: buffer_store_dword [[REG]] +define void @store_inline_imm_neg_0.0_i32(i32 addrspace(1)* %out) { + store i32 -2147483648, i32 addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}store_inline_imm_0.0_f32: ; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}} ; CHECK: buffer_store_dword [[REG]] define void @store_inline_imm_0.0_f32(float addrspace(1)* %out) { @@ -31,7 +50,7 @@ define void @store_inline_imm_0.0_f32(float addrspace(1)* %out) { ret void } -; CHECK-LABEL: {{^}}store_imm_neg_0.0_f32 +; CHECK-LABEL: {{^}}store_imm_neg_0.0_f32: ; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80000000 ; CHECK: buffer_store_dword [[REG]] define void @store_imm_neg_0.0_f32(float addrspace(1)* %out) { @@ -39,7 +58,7 @@ define void @store_imm_neg_0.0_f32(float addrspace(1)* %out) { ret void } -; CHECK-LABEL: {{^}}store_inline_imm_0.5_f32 +; CHECK-LABEL: {{^}}store_inline_imm_0.5_f32: ; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], 0.5{{$}} ; CHECK: buffer_store_dword [[REG]] define void @store_inline_imm_0.5_f32(float addrspace(1)* %out) { @@ -47,7 +66,7 @@ define void @store_inline_imm_0.5_f32(float addrspace(1)* %out) { ret void } -; CHECK-LABEL: {{^}}store_inline_imm_m_0.5_f32 +; CHECK-LABEL: {{^}}store_inline_imm_m_0.5_f32: ; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], -0.5{{$}} ; CHECK: buffer_store_dword [[REG]] define void @store_inline_imm_m_0.5_f32(float addrspace(1)* %out) { @@ -55,7 +74,7 @@ define void @store_inline_imm_m_0.5_f32(float addrspace(1)* %out) { ret void } -; CHECK-LABEL: {{^}}store_inline_imm_1.0_f32 +; CHECK-LABEL: {{^}}store_inline_imm_1.0_f32: ; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0{{$}} ; CHECK: buffer_store_dword [[REG]] define void @store_inline_imm_1.0_f32(float addrspace(1)* %out) { @@ -63,7 +82,7 @@ define void @store_inline_imm_1.0_f32(float addrspace(1)* %out) { ret void } -; CHECK-LABEL: {{^}}store_inline_imm_m_1.0_f32 +; CHECK-LABEL: {{^}}store_inline_imm_m_1.0_f32: ; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], -1.0{{$}} ; CHECK: buffer_store_dword [[REG]] define void @store_inline_imm_m_1.0_f32(float addrspace(1)* %out) { @@ -71,7 +90,7 @@ define void @store_inline_imm_m_1.0_f32(float addrspace(1)* %out) { ret void } -; CHECK-LABEL: {{^}}store_inline_imm_2.0_f32 +; CHECK-LABEL: {{^}}store_inline_imm_2.0_f32: ; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], 2.0{{$}} ; CHECK: buffer_store_dword [[REG]] define void @store_inline_imm_2.0_f32(float addrspace(1)* %out) { @@ -79,7 +98,7 @@ define void @store_inline_imm_2.0_f32(float addrspace(1)* %out) { ret void } -; CHECK-LABEL: {{^}}store_inline_imm_m_2.0_f32 +; CHECK-LABEL: {{^}}store_inline_imm_m_2.0_f32: ; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], -2.0{{$}} ; CHECK: buffer_store_dword [[REG]] define void @store_inline_imm_m_2.0_f32(float addrspace(1)* %out) { @@ -87,7 +106,7 @@ define void @store_inline_imm_m_2.0_f32(float addrspace(1)* %out) { ret void } -; CHECK-LABEL: {{^}}store_inline_imm_4.0_f32 +; CHECK-LABEL: {{^}}store_inline_imm_4.0_f32: ; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], 4.0{{$}} ; CHECK: buffer_store_dword [[REG]] define void @store_inline_imm_4.0_f32(float addrspace(1)* %out) { @@ -95,7 +114,7 @@ define void @store_inline_imm_4.0_f32(float addrspace(1)* %out) { ret void } -; CHECK-LABEL: {{^}}store_inline_imm_m_4.0_f32 +; CHECK-LABEL: {{^}}store_inline_imm_m_4.0_f32: ; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], -4.0{{$}} ; CHECK: buffer_store_dword [[REG]] define void @store_inline_imm_m_4.0_f32(float addrspace(1)* %out) { @@ -111,7 +130,7 @@ define void @store_literal_imm_f32(float addrspace(1)* %out) { ret void } -; CHECK-LABEL: {{^}}add_inline_imm_0.0_f32 +; CHECK-LABEL: {{^}}add_inline_imm_0.0_f32: ; CHECK: s_load_dword [[VAL:s[0-9]+]] ; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 0, [[VAL]]{{$}} ; CHECK: buffer_store_dword [[REG]] @@ -121,7 +140,7 @@ define void @add_inline_imm_0.0_f32(float addrspace(1)* %out, float %x) { ret void } -; CHECK-LABEL: {{^}}add_inline_imm_0.5_f32 +; CHECK-LABEL: {{^}}add_inline_imm_0.5_f32: ; CHECK: s_load_dword [[VAL:s[0-9]+]] ; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 0.5, [[VAL]]{{$}} ; CHECK: buffer_store_dword [[REG]] @@ -131,7 +150,7 @@ define void @add_inline_imm_0.5_f32(float addrspace(1)* %out, float %x) { ret void } -; CHECK-LABEL: {{^}}add_inline_imm_neg_0.5_f32 +; CHECK-LABEL: {{^}}add_inline_imm_neg_0.5_f32: ; CHECK: s_load_dword [[VAL:s[0-9]+]] ; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], -0.5, [[VAL]]{{$}} ; CHECK: buffer_store_dword [[REG]] @@ -141,7 +160,7 @@ define void @add_inline_imm_neg_0.5_f32(float addrspace(1)* %out, float %x) { ret void } -; CHECK-LABEL: {{^}}add_inline_imm_1.0_f32 +; CHECK-LABEL: {{^}}add_inline_imm_1.0_f32: ; CHECK: s_load_dword [[VAL:s[0-9]+]] ; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 1.0, [[VAL]]{{$}} ; CHECK: buffer_store_dword [[REG]] @@ -151,7 +170,7 @@ define void @add_inline_imm_1.0_f32(float addrspace(1)* %out, float %x) { ret void } -; CHECK-LABEL: {{^}}add_inline_imm_neg_1.0_f32 +; CHECK-LABEL: {{^}}add_inline_imm_neg_1.0_f32: ; CHECK: s_load_dword [[VAL:s[0-9]+]] ; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], -1.0, [[VAL]]{{$}} ; CHECK: buffer_store_dword [[REG]] @@ -161,7 +180,7 @@ define void @add_inline_imm_neg_1.0_f32(float addrspace(1)* %out, float %x) { ret void } -; CHECK-LABEL: {{^}}add_inline_imm_2.0_f32 +; CHECK-LABEL: {{^}}add_inline_imm_2.0_f32: ; CHECK: s_load_dword [[VAL:s[0-9]+]] ; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 2.0, [[VAL]]{{$}} ; CHECK: buffer_store_dword [[REG]] @@ -171,7 +190,7 @@ define void @add_inline_imm_2.0_f32(float addrspace(1)* %out, float %x) { ret void } -; CHECK-LABEL: {{^}}add_inline_imm_neg_2.0_f32 +; CHECK-LABEL: {{^}}add_inline_imm_neg_2.0_f32: ; CHECK: s_load_dword [[VAL:s[0-9]+]] ; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], -2.0, [[VAL]]{{$}} ; CHECK: buffer_store_dword [[REG]] @@ -181,7 +200,7 @@ define void @add_inline_imm_neg_2.0_f32(float addrspace(1)* %out, float %x) { ret void } -; CHECK-LABEL: {{^}}add_inline_imm_4.0_f32 +; CHECK-LABEL: {{^}}add_inline_imm_4.0_f32: ; CHECK: s_load_dword [[VAL:s[0-9]+]] ; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 4.0, [[VAL]]{{$}} ; CHECK: buffer_store_dword [[REG]] @@ -191,7 +210,7 @@ define void @add_inline_imm_4.0_f32(float addrspace(1)* %out, float %x) { ret void } -; CHECK-LABEL: {{^}}add_inline_imm_neg_4.0_f32 +; CHECK-LABEL: {{^}}add_inline_imm_neg_4.0_f32: ; CHECK: s_load_dword [[VAL:s[0-9]+]] ; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], -4.0, [[VAL]]{{$}} ; CHECK: buffer_store_dword [[REG]] @@ -201,7 +220,7 @@ define void @add_inline_imm_neg_4.0_f32(float addrspace(1)* %out, float %x) { ret void } -; CHECK-LABEL: @commute_add_inline_imm_0.5_f32 +; CHECK-LABEL: {{^}}commute_add_inline_imm_0.5_f32: ; CHECK: buffer_load_dword [[VAL:v[0-9]+]] ; CHECK: v_add_f32_e32 [[REG:v[0-9]+]], 0.5, [[VAL]] ; CHECK: buffer_store_dword [[REG]] @@ -212,7 +231,7 @@ define void @commute_add_inline_imm_0.5_f32(float addrspace(1)* %out, float addr ret void } -; CHECK-LABEL: @commute_add_literal_f32 +; CHECK-LABEL: {{^}}commute_add_literal_f32: ; CHECK: buffer_load_dword [[VAL:v[0-9]+]] ; CHECK: v_add_f32_e32 [[REG:v[0-9]+]], 0x44800000, [[VAL]] ; CHECK: buffer_store_dword [[REG]] @@ -223,7 +242,7 @@ define void @commute_add_literal_f32(float addrspace(1)* %out, float addrspace(1 ret void } -; CHECK-LABEL: {{^}}add_inline_imm_1_f32 +; CHECK-LABEL: {{^}}add_inline_imm_1_f32: ; CHECK: s_load_dword [[VAL:s[0-9]+]] ; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 1, [[VAL]]{{$}} ; CHECK: buffer_store_dword [[REG]] @@ -233,7 +252,7 @@ define void @add_inline_imm_1_f32(float addrspace(1)* %out, float %x) { ret void } -; CHECK-LABEL: {{^}}add_inline_imm_2_f32 +; CHECK-LABEL: {{^}}add_inline_imm_2_f32: ; CHECK: s_load_dword [[VAL:s[0-9]+]] ; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 2, [[VAL]]{{$}} ; CHECK: buffer_store_dword [[REG]] @@ -243,7 +262,7 @@ define void @add_inline_imm_2_f32(float addrspace(1)* %out, float %x) { ret void } -; CHECK-LABEL: {{^}}add_inline_imm_16_f32 +; CHECK-LABEL: {{^}}add_inline_imm_16_f32: ; CHECK: s_load_dword [[VAL:s[0-9]+]] ; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 16, [[VAL]] ; CHECK: buffer_store_dword [[REG]] @@ -253,7 +272,7 @@ define void @add_inline_imm_16_f32(float addrspace(1)* %out, float %x) { ret void } -; CHECK-LABEL: {{^}}add_inline_imm_neg_1_f32 +; CHECK-LABEL: {{^}}add_inline_imm_neg_1_f32: ; CHECK: s_load_dword [[VAL:s[0-9]+]] ; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], -1, [[VAL]] ; CHECK: buffer_store_dword [[REG]] @@ -263,7 +282,7 @@ define void @add_inline_imm_neg_1_f32(float addrspace(1)* %out, float %x) { ret void } -; CHECK-LABEL: {{^}}add_inline_imm_neg_2_f32 +; CHECK-LABEL: {{^}}add_inline_imm_neg_2_f32: ; CHECK: s_load_dword [[VAL:s[0-9]+]] ; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], -2, [[VAL]] ; CHECK: buffer_store_dword [[REG]] @@ -273,7 +292,7 @@ define void @add_inline_imm_neg_2_f32(float addrspace(1)* %out, float %x) { ret void } -; CHECK-LABEL: {{^}}add_inline_imm_neg_16_f32 +; CHECK-LABEL: {{^}}add_inline_imm_neg_16_f32: ; CHECK: s_load_dword [[VAL:s[0-9]+]] ; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], -16, [[VAL]] ; CHECK: buffer_store_dword [[REG]] @@ -283,7 +302,7 @@ define void @add_inline_imm_neg_16_f32(float addrspace(1)* %out, float %x) { ret void } -; CHECK-LABEL: {{^}}add_inline_imm_63_f32 +; CHECK-LABEL: {{^}}add_inline_imm_63_f32: ; CHECK: s_load_dword [[VAL:s[0-9]+]] ; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 63, [[VAL]] ; CHECK: buffer_store_dword [[REG]] @@ -293,7 +312,7 @@ define void @add_inline_imm_63_f32(float addrspace(1)* %out, float %x) { ret void } -; CHECK-LABEL: {{^}}add_inline_imm_64_f32 +; CHECK-LABEL: {{^}}add_inline_imm_64_f32: ; CHECK: s_load_dword [[VAL:s[0-9]+]] ; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 64, [[VAL]] ; CHECK: buffer_store_dword [[REG]] @@ -303,7 +322,8 @@ define void @add_inline_imm_64_f32(float addrspace(1)* %out, float %x) { ret void } -; CHECK-LABEL: {{^}}add_inline_imm_0.0_f64 + +; CHECK-LABEL: {{^}}add_inline_imm_0.0_f64: ; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb ; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c ; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 0, [[VAL]] @@ -314,7 +334,7 @@ define void @add_inline_imm_0.0_f64(double addrspace(1)* %out, double %x) { ret void } -; CHECK-LABEL: {{^}}add_inline_imm_0.5_f64 +; CHECK-LABEL: {{^}}add_inline_imm_0.5_f64: ; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb ; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c ; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 0.5, [[VAL]] @@ -325,7 +345,7 @@ define void @add_inline_imm_0.5_f64(double addrspace(1)* %out, double %x) { ret void } -; CHECK-LABEL: {{^}}add_inline_imm_neg_0.5_f64 +; CHECK-LABEL: {{^}}add_inline_imm_neg_0.5_f64: ; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb ; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c ; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -0.5, [[VAL]] @@ -336,7 +356,7 @@ define void @add_inline_imm_neg_0.5_f64(double addrspace(1)* %out, double %x) { ret void } -; CHECK-LABEL: {{^}}add_inline_imm_1.0_f64 +; CHECK-LABEL: {{^}}add_inline_imm_1.0_f64: ; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb ; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c ; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 1.0, [[VAL]] @@ -347,7 +367,7 @@ define void @add_inline_imm_1.0_f64(double addrspace(1)* %out, double %x) { ret void } -; CHECK-LABEL: {{^}}add_inline_imm_neg_1.0_f64 +; CHECK-LABEL: {{^}}add_inline_imm_neg_1.0_f64: ; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb ; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c ; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -1.0, [[VAL]] @@ -358,7 +378,7 @@ define void @add_inline_imm_neg_1.0_f64(double addrspace(1)* %out, double %x) { ret void } -; CHECK-LABEL: {{^}}add_inline_imm_2.0_f64 +; CHECK-LABEL: {{^}}add_inline_imm_2.0_f64: ; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb ; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c ; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 2.0, [[VAL]] @@ -369,7 +389,7 @@ define void @add_inline_imm_2.0_f64(double addrspace(1)* %out, double %x) { ret void } -; CHECK-LABEL: {{^}}add_inline_imm_neg_2.0_f64 +; CHECK-LABEL: {{^}}add_inline_imm_neg_2.0_f64: ; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb ; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c ; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -2.0, [[VAL]] @@ -380,7 +400,7 @@ define void @add_inline_imm_neg_2.0_f64(double addrspace(1)* %out, double %x) { ret void } -; CHECK-LABEL: {{^}}add_inline_imm_4.0_f64 +; CHECK-LABEL: {{^}}add_inline_imm_4.0_f64: ; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb ; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c ; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 4.0, [[VAL]] @@ -391,7 +411,7 @@ define void @add_inline_imm_4.0_f64(double addrspace(1)* %out, double %x) { ret void } -; CHECK-LABEL: {{^}}add_inline_imm_neg_4.0_f64 +; CHECK-LABEL: {{^}}add_inline_imm_neg_4.0_f64: ; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb ; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c ; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -4.0, [[VAL]] @@ -403,7 +423,7 @@ define void @add_inline_imm_neg_4.0_f64(double addrspace(1)* %out, double %x) { } -; CHECK-LABEL: {{^}}add_inline_imm_1_f64 +; CHECK-LABEL: {{^}}add_inline_imm_1_f64: ; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb ; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c ; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 1, [[VAL]] @@ -414,7 +434,7 @@ define void @add_inline_imm_1_f64(double addrspace(1)* %out, double %x) { ret void } -; CHECK-LABEL: {{^}}add_inline_imm_2_f64 +; CHECK-LABEL: {{^}}add_inline_imm_2_f64: ; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb ; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c ; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 2, [[VAL]] @@ -425,7 +445,7 @@ define void @add_inline_imm_2_f64(double addrspace(1)* %out, double %x) { ret void } -; CHECK-LABEL: {{^}}add_inline_imm_16_f64 +; CHECK-LABEL: {{^}}add_inline_imm_16_f64: ; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb ; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c ; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 16, [[VAL]] @@ -436,7 +456,7 @@ define void @add_inline_imm_16_f64(double addrspace(1)* %out, double %x) { ret void } -; CHECK-LABEL: {{^}}add_inline_imm_neg_1_f64 +; CHECK-LABEL: {{^}}add_inline_imm_neg_1_f64: ; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb ; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c ; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -1, [[VAL]] @@ -447,7 +467,7 @@ define void @add_inline_imm_neg_1_f64(double addrspace(1)* %out, double %x) { ret void } -; CHECK-LABEL: {{^}}add_inline_imm_neg_2_f64 +; CHECK-LABEL: {{^}}add_inline_imm_neg_2_f64: ; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb ; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c ; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -2, [[VAL]] @@ -458,7 +478,7 @@ define void @add_inline_imm_neg_2_f64(double addrspace(1)* %out, double %x) { ret void } -; CHECK-LABEL: {{^}}add_inline_imm_neg_16_f64 +; CHECK-LABEL: {{^}}add_inline_imm_neg_16_f64: ; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb ; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c ; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -16, [[VAL]] @@ -469,7 +489,7 @@ define void @add_inline_imm_neg_16_f64(double addrspace(1)* %out, double %x) { ret void } -; CHECK-LABEL: {{^}}add_inline_imm_63_f64 +; CHECK-LABEL: {{^}}add_inline_imm_63_f64: ; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb ; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c ; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 63, [[VAL]] @@ -480,7 +500,7 @@ define void @add_inline_imm_63_f64(double addrspace(1)* %out, double %x) { ret void } -; CHECK-LABEL: {{^}}add_inline_imm_64_f64 +; CHECK-LABEL: {{^}}add_inline_imm_64_f64: ; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb ; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c ; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 64, [[VAL]] @@ -492,7 +512,7 @@ define void @add_inline_imm_64_f64(double addrspace(1)* %out, double %x) { } -; CHECK-LABEL: {{^}}store_inline_imm_0.0_f64 +; CHECK-LABEL: {{^}}store_inline_imm_0.0_f64: ; CHECK: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0 ; CHECK: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0 ; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}} @@ -500,3 +520,98 @@ define void @store_inline_imm_0.0_f64(double addrspace(1)* %out) { store double 0.0, double addrspace(1)* %out ret void } + + +; CHECK-LABEL: {{^}}store_literal_imm_neg_0.0_f64: +; CHECK-DAG: s_mov_b32 s[[HI_SREG:[0-9]+]], 0x80000000 +; CHECK-DAG: s_mov_b32 s[[LO_SREG:[0-9]+]], 0{{$}} +; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], s[[LO_SREG]] +; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], s[[HI_SREG]] +; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}} +define void @store_literal_imm_neg_0.0_f64(double addrspace(1)* %out) { + store double -0.0, double addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}store_inline_imm_0.5_f64: +; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0{{$}} +; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0x3fe00000 +; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}} +define void @store_inline_imm_0.5_f64(double addrspace(1)* %out) { + store double 0.5, double addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}store_inline_imm_m_0.5_f64: +; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0{{$}} +; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0xbfe00000 +; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}} +define void @store_inline_imm_m_0.5_f64(double addrspace(1)* %out) { + store double -0.5, double addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}store_inline_imm_1.0_f64: +; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0{{$}} +; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0x3ff00000 +; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}} +define void @store_inline_imm_1.0_f64(double addrspace(1)* %out) { + store double 1.0, double addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}store_inline_imm_m_1.0_f64: +; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0{{$}} +; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0xbff00000 +; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}} +define void @store_inline_imm_m_1.0_f64(double addrspace(1)* %out) { + store double -1.0, double addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}store_inline_imm_2.0_f64: +; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0{{$}} +; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 2.0 +; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}} +define void @store_inline_imm_2.0_f64(double addrspace(1)* %out) { + store double 2.0, double addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}store_inline_imm_m_2.0_f64: +; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0{{$}} +; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], -2.0 +; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}} +define void @store_inline_imm_m_2.0_f64(double addrspace(1)* %out) { + store double -2.0, double addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}store_inline_imm_4.0_f64: +; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0{{$}} +; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0x40100000 +; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}} +define void @store_inline_imm_4.0_f64(double addrspace(1)* %out) { + store double 4.0, double addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}store_inline_imm_m_4.0_f64: +; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0{{$}} +; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0xc0100000 +; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}} +define void @store_inline_imm_m_4.0_f64(double addrspace(1)* %out) { + store double -4.0, double addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}store_literal_imm_f64: +; CHECK-DAG: s_mov_b32 s[[HI_SREG:[0-9]+]], 0x40b00000 +; CHECK-DAG: s_mov_b32 s[[LO_SREG:[0-9]+]], 0{{$}} +; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], s[[LO_SREG]] +; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], s[[HI_SREG]] +; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}} +define void @store_literal_imm_f64(double addrspace(1)* %out) { + store double 4096.0, double addrspace(1)* %out + ret void +} diff --git a/llvm/test/CodeGen/R600/select64.ll b/llvm/test/CodeGen/R600/select64.ll index 3fd648139fe2..0245dae56b11 100644 --- a/llvm/test/CodeGen/R600/select64.ll +++ b/llvm/test/CodeGen/R600/select64.ll @@ -49,3 +49,20 @@ define void @v_select_trunc_i64_2(i32 addrspace(1)* %out, i32 %cond, i64 addrspa store i32 %trunc, i32 addrspace(1)* %out, align 4 ret void } + +; CHECK-LABEL: {{^}}v_select_i64_split_imm: +; CHECK: s_mov_b32 [[SHI:s[0-9]+]], 63 +; CHECK: s_mov_b32 [[SLO:s[0-9]+]], 0 +; CHECK-DAG: v_mov_b32_e32 [[VHI:v[0-9]+]], [[SHI]] +; CHECK-DAG: v_mov_b32_e32 [[VLO:v[0-9]+]], [[SLO]] +; CHECK-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, [[VLO]], {{v[0-9]+}} +; CHECK-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, [[VHI]], {{v[0-9]+}} +; CHECK: s_endpgm +define void @v_select_i64_split_imm(i64 addrspace(1)* %out, i32 %cond, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind { + %cmp = icmp ugt i32 %cond, 5 + %a = load i64 addrspace(1)* %aptr, align 8 + %b = load i64 addrspace(1)* %bptr, align 8 + %sel = select i1 %cmp, i64 %a, i64 270582939648 ; 63 << 32 + store i64 %sel, i64 addrspace(1)* %out, align 8 + ret void +} diff --git a/llvm/test/CodeGen/R600/sint_to_fp.f64.ll b/llvm/test/CodeGen/R600/sint_to_fp.f64.ll index 77844a6aa384..893cfb337582 100644 --- a/llvm/test/CodeGen/R600/sint_to_fp.f64.ll +++ b/llvm/test/CodeGen/R600/sint_to_fp.f64.ll @@ -10,12 +10,13 @@ define void @sint_to_fp_i32_to_f64(double addrspace(1)* %out, i32 %in) { ret void } +; FIXME: select on 0, 0 ; SI-LABEL: {{^}}sint_to_fp_i1_f64: ; SI: v_cmp_eq_i32_e64 [[CMP:s\[[0-9]+:[0-9]\]]], ; We can't fold the SGPRs into v_cndmask_b32_e64, because it already ; uses an SGPR for [[CMP]] ; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, v{{[0-9]+}}, [[CMP]] -; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, v{{[0-9]+}}, [[CMP]] +; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 0, [[CMP]] ; SI: buffer_store_dwordx2 ; SI: s_endpgm define void @sint_to_fp_i1_f64(double addrspace(1)* %out, i32 %in) { diff --git a/llvm/test/CodeGen/R600/uint_to_fp.f64.ll b/llvm/test/CodeGen/R600/uint_to_fp.f64.ll index 09e987dd14da..f7152430b5c4 100644 --- a/llvm/test/CodeGen/R600/uint_to_fp.f64.ll +++ b/llvm/test/CodeGen/R600/uint_to_fp.f64.ll @@ -70,12 +70,13 @@ define void @s_uint_to_fp_v4i32_to_v4f64(<4 x double> addrspace(1)* %out, <4 x i ret void } +; FIXME: select on 0, 0 ; SI-LABEL: {{^}}uint_to_fp_i1_to_f64: ; SI: v_cmp_eq_i32_e64 [[CMP:s\[[0-9]+:[0-9]\]]], ; We can't fold the SGPRs into v_cndmask_b32_e64, because it already ; uses an SGPR for [[CMP]] ; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, v{{[0-9]+}}, [[CMP]] -; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, v{{[0-9]+}}, [[CMP]] +; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 0, [[CMP]] ; SI: buffer_store_dwordx2 ; SI: s_endpgm define void @uint_to_fp_i1_to_f64(double addrspace(1)* %out, i32 %in) {