diff --git a/llvm/lib/Target/R600/SIFoldOperands.cpp b/llvm/lib/Target/R600/SIFoldOperands.cpp index 545905ba64e1..655b3aaaa2b9 100644 --- a/llvm/lib/Target/R600/SIFoldOperands.cpp +++ b/llvm/lib/Target/R600/SIFoldOperands.cpp @@ -56,10 +56,16 @@ struct FoldCandidate { uint64_t ImmToFold; FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp) : - UseMI(MI), UseOpNo(OpNo), OpToFold(FoldOp), ImmToFold(0) { } + UseMI(MI), UseOpNo(OpNo) { - FoldCandidate(MachineInstr *MI, unsigned OpNo, uint64_t Imm) : - UseMI(MI), UseOpNo(OpNo), OpToFold(nullptr), ImmToFold(Imm) { } + if (FoldOp->isImm()) { + OpToFold = nullptr; + ImmToFold = FoldOp->getImm(); + } else { + assert(FoldOp->isReg()); + OpToFold = FoldOp; + } + } bool isImm() const { return !OpToFold; @@ -119,6 +125,35 @@ static bool updateOperand(FoldCandidate &Fold, return false; } +static bool tryAddToFoldList(std::vector &FoldList, + MachineInstr *MI, unsigned OpNo, + MachineOperand *OpToFold, + const SIInstrInfo *TII) { + if (!TII->isOperandLegal(MI, OpNo, OpToFold)) { + // Operand is not legal, so try to commute the instruction to + // see if this makes it possible to fold. + unsigned CommuteIdx0; + unsigned CommuteIdx1; + bool CanCommute = TII->findCommutedOpIndices(MI, CommuteIdx0, CommuteIdx1); + + if (CanCommute) { + if (CommuteIdx0 == OpNo) + OpNo = CommuteIdx1; + else if (CommuteIdx1 == OpNo) + OpNo = CommuteIdx0; + } + + if (!CanCommute || !TII->commuteInstruction(MI)) + return false; + + if (!TII->isOperandLegal(MI, OpNo, OpToFold)) + return false; + } + + FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold)); + return true; +} + bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) { MachineRegisterInfo &MRI = MF.getRegInfo(); const SIInstrInfo *TII = @@ -140,6 +175,11 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) { MachineOperand &OpToFold = MI.getOperand(1); bool FoldingImm = OpToFold.isImm() || OpToFold.isFPImm(); + // FIXME: We could also be folding things like FrameIndexes and + // TargetIndexes. + if (!FoldingImm && !OpToFold.isReg()) + continue; + // Folding immediates with more than one use will increase program side. // FIXME: This will also reduce register usage, which may be better // in some cases. A better heuristic is needed. @@ -210,24 +250,13 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) { UseDesc.OpInfo[Use.getOperandNo()].RegClass == -1) continue; - if (FoldingImm) { - const MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue()); - if (TII->isOperandLegal(UseMI, Use.getOperandNo(), &ImmOp)) { - FoldList.push_back(FoldCandidate(UseMI, Use.getOperandNo(), - Imm.getSExtValue())); - } + MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue()); + tryAddToFoldList(FoldList, UseMI, Use.getOperandNo(), &ImmOp, TII); continue; } - // Normal substitution with registers - if (TII->isOperandLegal(UseMI, Use.getOperandNo(), &OpToFold)) { - FoldList.push_back(FoldCandidate(UseMI, Use.getOperandNo(), &OpToFold)); - continue; - } - - // FIXME: We could commute the instruction to create more opportunites - // for folding. This will only be useful if we have 32-bit instructions. + tryAddToFoldList(FoldList, UseMI, Use.getOperandNo(), &OpToFold, TII); // FIXME: We could try to change the instruction from 64-bit to 32-bit // to enable more folding opportunites. The shrink operands pass diff --git a/llvm/lib/Target/R600/SIInstrInfo.cpp b/llvm/lib/Target/R600/SIInstrInfo.cpp index 37e64e930651..743d1c65815d 100644 --- a/llvm/lib/Target/R600/SIInstrInfo.cpp +++ b/llvm/lib/Target/R600/SIInstrInfo.cpp @@ -709,6 +709,7 @@ bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { + if (MI->getNumOperands() < 3) return nullptr; @@ -730,8 +731,9 @@ MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI, // Make sure it's legal to commute operands for VOP2. if (isVOP2(MI->getOpcode()) && (!isOperandLegal(MI, Src0Idx, &Src1) || - !isOperandLegal(MI, Src1Idx, &Src0))) + !isOperandLegal(MI, Src1Idx, &Src0))) { return nullptr; + } if (!Src1.isReg()) { // Allow commuting instructions with Imm or FPImm operands. @@ -1471,6 +1473,7 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr *MI, unsigned OpIdx, // // s_sendmsg 0, s0 ; Operand defined as m0reg // ; RI.getCommonSubClass(s0,m0reg) = m0reg ; NOT LEGAL + return RI.getCommonSubClass(RC, RI.getRegClass(OpInfo.RegClass)) == RC; } diff --git a/llvm/test/CodeGen/R600/mulhu.ll b/llvm/test/CodeGen/R600/mulhu.ll index 146c92f8905b..865971712306 100644 --- a/llvm/test/CodeGen/R600/mulhu.ll +++ b/llvm/test/CodeGen/R600/mulhu.ll @@ -1,7 +1,7 @@ ;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s ;CHECK: v_mov_b32_e32 v{{[0-9]+}}, 0xaaaaaaab -;CHECK: v_mul_hi_u32 v0, {{[sv][0-9]+}}, {{v[0-9]+}} +;CHECK: v_mul_hi_u32 v0, {{v[0-9]+}}, {{s[0-9]+}} ;CHECK-NEXT: v_lshrrev_b32_e32 v0, 1, v0 define void @test(i32 %p) { diff --git a/llvm/test/CodeGen/R600/sdiv.ll b/llvm/test/CodeGen/R600/sdiv.ll index 8d4208cb585c..c635c0569e84 100644 --- a/llvm/test/CodeGen/R600/sdiv.ll +++ b/llvm/test/CodeGen/R600/sdiv.ll @@ -35,7 +35,7 @@ define void @sdiv_i32_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { ; FUNC-LABEL: {{^}}slow_sdiv_i32_3435: ; SI: buffer_load_dword [[VAL:v[0-9]+]], ; SI: v_mov_b32_e32 [[MAGIC:v[0-9]+]], 0x98a1930b -; SI: v_mul_hi_i32 [[TMP:v[0-9]+]], [[VAL]], [[MAGIC]] +; SI: v_mul_hi_i32 [[TMP:v[0-9]+]], [[MAGIC]], [[VAL]] ; SI: v_add_i32 ; SI: v_lshrrev_b32 ; SI: v_ashrrev_i32 diff --git a/llvm/test/CodeGen/R600/use-sgpr-multiple-times.ll b/llvm/test/CodeGen/R600/use-sgpr-multiple-times.ll index 2c6ae1eae67b..97d73ba74bc5 100644 --- a/llvm/test/CodeGen/R600/use-sgpr-multiple-times.ll +++ b/llvm/test/CodeGen/R600/use-sgpr-multiple-times.ll @@ -41,7 +41,7 @@ define void @test_sgpr_use_twice_ternary_op_a_a_b(float addrspace(1)* %out, floa ; SI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb ; SI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc ; SI: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]] -; SI: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR0]], [[VGPR1]], [[SGPR0]] +; SI: v_fma_f32 [[RESULT:v[0-9]+]], [[VGPR1]], [[SGPR0]], [[SGPR0]] ; SI: buffer_store_dword [[RESULT]] define void @test_sgpr_use_twice_ternary_op_a_b_a(float addrspace(1)* %out, float %a, float %b) #0 { %fma = call float @llvm.fma.f32(float %a, float %b, float %a) #1 @@ -53,7 +53,7 @@ define void @test_sgpr_use_twice_ternary_op_a_b_a(float addrspace(1)* %out, floa ; SI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb ; SI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc ; SI: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]] -; SI: v_fma_f32 [[RESULT:v[0-9]+]], [[VGPR1]], [[SGPR0]], [[SGPR0]] +; SI: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR0]], [[VGPR1]], [[SGPR0]] ; SI: buffer_store_dword [[RESULT]] define void @test_sgpr_use_twice_ternary_op_b_a_a(float addrspace(1)* %out, float %a, float %b) #0 { %fma = call float @llvm.fma.f32(float %b, float %a, float %a) #1