From 6604d81e1bb349597e8c1b919303a2efb4c9ba19 Mon Sep 17 00:00:00 2001 From: Thomas Symalla Date: Wed, 13 Jan 2021 15:23:45 +0100 Subject: [PATCH] Added and used new target pseudo for v_cvt_pk_i16_i32, changes due to code review. --- llvm/lib/Target/AMDGPU/AMDGPUGISel.td | 2 + llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td | 2 + .../AMDGPU/AMDGPUPreLegalizerCombiner.cpp | 49 ++++++++----------- .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 1 + llvm/lib/Target/AMDGPU/SIInstructions.td | 6 +++ .../AMDGPU/GlobalISel/combine-short-clamp.ll | 2 +- 6 files changed, 32 insertions(+), 30 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td index bba03736d01a..d1e23e1c3f44 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td @@ -174,6 +174,8 @@ def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; + def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td index 894677ec68b6..c0cb1781abe3 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -213,6 +213,8 @@ def AMDGPUcvt_f32_ubyte2 : SDNode<"AMDGPUISD::CVT_F32_UBYTE2", def AMDGPUcvt_f32_ubyte3 : SDNode<"AMDGPUISD::CVT_F32_UBYTE3", SDTIntToFPOp, []>; +def AMDGPUcvt_pk_i16_i32 : SDNode<"AMDGPUISD::CVT_PK_I16_I32", + AMDGPUIntPackOp, []>; // urecip - This operation is a helper for integer division, it returns the // result of 1 / a as a fractional unsigned integer. diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp index 7309a0c91a5e..f70fadbcb5f3 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp @@ -11,10 +11,13 @@ // //===----------------------------------------------------------------------===// +<<<<<<< HEAD <<<<<<< HEAD #include "AMDGPU.h" ======= #include "AMDGPULegalizerInfo.h" +======= +>>>>>>> Added and used new target pseudo for v_cvt_pk_i16_i32, changes due to code review. #include "AMDGPUTargetMachine.h" >>>>>>> Move Combiner to PreLegalize step #include "llvm/CodeGen/GlobalISel/Combiner.h" @@ -70,8 +73,6 @@ bool AMDGPUPreLegalizerCombinerHelper::matchClampI64ToI16( if (DstType != LLT::scalar(16)) return false; - LLVM_DEBUG(dbgs() << "Matching Clamp i64 to i16\n"); - Register Base; // Try to match a combination of min / max MIR opcodes. @@ -128,38 +129,33 @@ void AMDGPUPreLegalizerCombinerHelper::applyClampI64ToI16( MRI.setRegClass(Hi32, &AMDGPU::VGPR_32RegClass); MRI.setRegClass(Lo32, &AMDGPU::VGPR_32RegClass); - constexpr unsigned int CvtOpcode = AMDGPU::V_CVT_PK_I16_I32_e64; - assert(MI.getOpcode() != CvtOpcode); + assert(MI.getOpcode() != AMDGPU::G_AMDGPU_CVT_PK_I16_I32); - const auto REG_CLASS = &AMDGPU::VGPR_32RegClass; + Register CvtDst = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + const LLT V2S16 = LLT::vector(2, 16); + MRI.setType(CvtDst, V2S16); - Register CvtDst = MRI.createVirtualRegister(REG_CLASS); - MRI.setType(CvtDst, S32); - - auto CvtPk = B.buildInstr(CvtOpcode); - CvtPk.addDef(CvtDst); - CvtPk.addReg(Hi32); - CvtPk.addReg(Lo32); - CvtPk.setMIFlags(MI.getFlags()); + B.buildInstr(AMDGPU::G_AMDGPU_CVT_PK_I16_I32, + {CvtDst}, + {Hi32, Lo32}, + MI.getFlags()); auto MinBoundary = std::min(MatchInfo.Cmp1, MatchInfo.Cmp2); auto MaxBoundary = std::max(MatchInfo.Cmp1, MatchInfo.Cmp2); auto MinBoundaryDst = B.buildConstant(S32, MinBoundary); - MRI.setRegClass(MinBoundaryDst.getReg(0), REG_CLASS); + MRI.setRegClass(MinBoundaryDst.getReg(0), &AMDGPU::VGPR_32RegClass); auto MaxBoundaryDst = B.buildConstant(S32, MaxBoundary); - MRI.setRegClass(MaxBoundaryDst.getReg(0), REG_CLASS); + MRI.setRegClass(MaxBoundaryDst.getReg(0), &AMDGPU::VGPR_32RegClass); - Register MedDst = MRI.createVirtualRegister(REG_CLASS); + Register MedDst = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); MRI.setType(MedDst, S32); - auto Med = B.buildInstr(AMDGPU::V_MED3_I32); - Med.addDef(MedDst); - Med.addReg(MinBoundaryDst.getReg(0)); - Med.addReg(CvtDst); - Med.addReg(MaxBoundaryDst.getReg(0)); - Med.setMIFlags(MI.getFlags()); + B.buildInstr(AMDGPU::V_MED3_I32, + {MedDst}, + {MinBoundaryDst.getReg(0), CvtDst, MaxBoundaryDst.getReg(0)}, + MI.getFlags()); Register TruncDst = MRI.createGenericVirtualRegister(LLT::scalar(16)); B.buildTrunc(TruncDst, MedDst); @@ -197,10 +193,9 @@ public: AMDGPUGenPreLegalizerCombinerHelperRuleConfig GeneratedRuleCfg; AMDGPUPreLegalizerCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize, - const AMDGPULegalizerInfo *LI, GISelKnownBits *KB, MachineDominatorTree *MDT) : CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false, - /*LegalizerInfo*/ LI, EnableOpt, OptSize, MinSize), + /*LegalizerInfo*/ nullptr, EnableOpt, OptSize, MinSize), KB(KB), MDT(MDT) { if (!GeneratedRuleCfg.parseCommandLineOption()) report_fatal_error("Invalid rule identifier"); @@ -282,16 +277,12 @@ bool AMDGPUPreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) { const Function &F = MF.getFunction(); bool EnableOpt = MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F); - - const GCNSubtarget &ST = MF.getSubtarget(); - const AMDGPULegalizerInfo *LI = - static_cast(ST.getLegalizerInfo()); GISelKnownBits *KB = &getAnalysis().get(MF); MachineDominatorTree *MDT = IsOptNone ? nullptr : &getAnalysis(); AMDGPUPreLegalizerCombinerInfo PCInfo(EnableOpt, F.hasOptSize(), - F.hasMinSize(), LI, KB, MDT); + F.hasMinSize(), KB, MDT); Combiner C(PCInfo, TPC); return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 502356d4f9a4..d63090f36148 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -3621,6 +3621,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case AMDGPU::G_AMDGPU_CVT_F32_UBYTE1: case AMDGPU::G_AMDGPU_CVT_F32_UBYTE2: case AMDGPU::G_AMDGPU_CVT_F32_UBYTE3: + case AMDGPU::G_AMDGPU_CVT_PK_I16_I32: return getDefaultMappingVOP(MI); case AMDGPU::G_UMULH: case AMDGPU::G_SMULH: { diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index ecb875debefd..59e3cad72fce 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -2575,6 +2575,12 @@ def G_AMDGPU_CVT_F32_UBYTE#N : AMDGPUGenericInstruction { } } +def G_AMDGPU_CVT_PK_I16_I32 : AMDGPUGenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$src0, type0:$src1); + let hasSideEffects = 0; +} + // Atomic cmpxchg. $cmpval ad $newval are packed in a single vector // operand Expects a MachineMemOperand in addition to explicit // operands. diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-short-clamp.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-short-clamp.ll index e7d6634c29a5..90d4735b84b0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-short-clamp.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-short-clamp.ll @@ -109,4 +109,4 @@ entry: %min = call i64 @llvm.smin.i64(i64 %max, i64 0) %result = trunc i64 %min to i16 ret i16 %result -} \ No newline at end of file +}