From 26e1ebd3ea2cc37a533f16f008d4b05e50a6a5dc Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Thu, 20 Jan 2022 09:38:24 +0000 Subject: [PATCH] [GlobalISel] Change ConstantFoldVectorBinop to return vector of APInt Previously it built MIR for the results and returned a Register. This avoids building constants for earlier elements of the vector if later elements will fail to fold, and allows CSEMIRBuilder::buildInstr to avoid unconditionally building a copy from the result. Use a new helper function MachineIRBuilder::buildBuildVectorConstant to build a G_BUILD_VECTOR of G_CONSTANTs. Differential Revision: https://reviews.llvm.org/D117758 --- .../CodeGen/GlobalISel/MachineIRBuilder.h | 5 ++++ llvm/include/llvm/CodeGen/GlobalISel/Utils.h | 11 +++----- llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp | 8 +++--- .../CodeGen/GlobalISel/MachineIRBuilder.cpp | 11 ++++++++ llvm/lib/CodeGen/GlobalISel/Utils.cpp | 26 +++++++------------ .../irtranslator-constant-fold-vector-op.ll | 3 +-- .../GlobalISel/irtranslator-getelementptr.ll | 7 +++-- 7 files changed, 38 insertions(+), 33 deletions(-) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h index e596bb6996cc..7fe9252824c7 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h @@ -1008,6 +1008,11 @@ public: MachineInstrBuilder buildBuildVector(const DstOp &Res, ArrayRef Ops); + /// Build and insert \p Res = G_BUILD_VECTOR \p Op0, ... where each OpN is + /// built with G_CONSTANT. + MachineInstrBuilder buildBuildVectorConstant(const DstOp &Res, + ArrayRef Ops); + /// Build and insert \p Res = G_BUILD_VECTOR with \p Src replicated to fill /// the number of elements MachineInstrBuilder buildSplatVector(const DstOp &Res, diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h index f0030f68470f..7c1c89d8d6a9 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h @@ -269,13 +269,10 @@ Optional ConstantFoldFPBinOp(unsigned Opcode, const Register Op1, const MachineRegisterInfo &MRI); /// Tries to constant fold a vector binop with sources \p Op1 and \p Op2. -/// If successful, returns the G_BUILD_VECTOR representing the folded vector -/// constant. \p MIB should have an insertion point already set to create new -/// G_CONSTANT instructions as needed. -Register ConstantFoldVectorBinop(unsigned Opcode, const Register Op1, - const Register Op2, - const MachineRegisterInfo &MRI, - MachineIRBuilder &MIB); +/// Returns an empty vector on failure. +SmallVector ConstantFoldVectorBinop(unsigned Opcode, const Register Op1, + const Register Op2, + const MachineRegisterInfo &MRI); Optional ConstantFoldExtOp(unsigned Opcode, const Register Op1, uint64_t Imm, const MachineRegisterInfo &MRI); diff --git a/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp index aec74d2f9c16..a432e4ed7fb7 100644 --- a/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp @@ -203,10 +203,10 @@ MachineInstrBuilder CSEMIRBuilder::buildInstr(unsigned Opc, if (SrcTy.isVector()) { // Try to constant fold vector constants. - Register VecCst = ConstantFoldVectorBinop( - Opc, SrcOps[0].getReg(), SrcOps[1].getReg(), *getMRI(), *this); - if (VecCst) - return buildCopy(DstOps[0], VecCst); + SmallVector VecCst = ConstantFoldVectorBinop( + Opc, SrcOps[0].getReg(), SrcOps[1].getReg(), *getMRI()); + if (!VecCst.empty()) + return buildBuildVectorConstant(DstOps[0], VecCst); break; } diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index b180b8770f86..b2c5f310cd46 100644 --- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -664,6 +664,17 @@ MachineInstrBuilder MachineIRBuilder::buildBuildVector(const DstOp &Res, return buildInstr(TargetOpcode::G_BUILD_VECTOR, Res, TmpVec); } +MachineInstrBuilder +MachineIRBuilder::buildBuildVectorConstant(const DstOp &Res, + ArrayRef Ops) { + SmallVector TmpVec; + TmpVec.reserve(Ops.size()); + LLT EltTy = Res.getLLTTy(*getMRI()).getElementType(); + for (auto &Op : Ops) + TmpVec.push_back(buildConstant(EltTy, Op)); + return buildInstr(TargetOpcode::G_BUILD_VECTOR, Res, TmpVec); +} + MachineInstrBuilder MachineIRBuilder::buildSplatVector(const DstOp &Res, const SrcOp &Src) { SmallVector TmpVec(Res.getLLTTy(*getMRI()).getNumElements(), Src); diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp index 1544268be9fc..cf80350f17de 100644 --- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -608,33 +608,27 @@ Optional llvm::ConstantFoldFPBinOp(unsigned Opcode, const Register Op1, return None; } -Register llvm::ConstantFoldVectorBinop(unsigned Opcode, const Register Op1, - const Register Op2, - const MachineRegisterInfo &MRI, - MachineIRBuilder &MIB) { +SmallVector +llvm::ConstantFoldVectorBinop(unsigned Opcode, const Register Op1, + const Register Op2, + const MachineRegisterInfo &MRI) { auto *SrcVec2 = getOpcodeDef(Op2, MRI); if (!SrcVec2) - return Register(); + return SmallVector(); auto *SrcVec1 = getOpcodeDef(Op1, MRI); if (!SrcVec1) - return Register(); + return SmallVector(); - const LLT EltTy = MRI.getType(SrcVec1->getSourceReg(0)); - - SmallVector FoldedElements; + SmallVector FoldedElements; for (unsigned Idx = 0, E = SrcVec1->getNumSources(); Idx < E; ++Idx) { auto MaybeCst = ConstantFoldBinOp(Opcode, SrcVec1->getSourceReg(Idx), SrcVec2->getSourceReg(Idx), MRI); if (!MaybeCst) - return Register(); - auto FoldedCstReg = MIB.buildConstant(EltTy, *MaybeCst).getReg(0); - FoldedElements.emplace_back(FoldedCstReg); + return SmallVector(); + FoldedElements.push_back(*MaybeCst); } - // Create the new vector constant. - auto CstVec = - MIB.buildBuildVector(MRI.getType(SrcVec1->getReg(0)), FoldedElements); - return CstVec.getReg(0); + return FoldedElements; } bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI, diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constant-fold-vector-op.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constant-fold-vector-op.ll index 9ee4c4b000ab..44bf8ce235d6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constant-fold-vector-op.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constant-fold-vector-op.ll @@ -12,8 +12,7 @@ define amdgpu_kernel void @constant_fold_vector_add() { ; CHECK-NEXT: [[C1:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[C2]](s64), [[C2]](s64), [[C2]](s64), [[C2]](s64) - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY [[BUILD_VECTOR1]](<4 x s64>) - ; CHECK-NEXT: G_STORE [[COPY]](<4 x s64>), [[C1]](p1) :: (store (<4 x s64>) into `<4 x i64> addrspace(1)* null`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s64>), [[C1]](p1) :: (store (<4 x s64>) into `<4 x i64> addrspace(1)* null`, addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 entry: %add = add <4 x i64> zeroinitializer, zeroinitializer diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-getelementptr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-getelementptr.ll index aa9926b18e63..2e660884e7db 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-getelementptr.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-getelementptr.ll @@ -184,10 +184,9 @@ define <2 x i32 addrspace(1)*> @vector_gep_v2p1_index_v2i64_constant(<2 x i32 ad ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C2]](s64), [[C2]](s64) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C2]](s64), [[C3]](s64) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s64>) = COPY [[BUILD_VECTOR4]](<2 x s64>) - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(<2 x p1>) = G_PTR_ADD [[BUILD_VECTOR]], [[COPY8]](<2 x s64>) - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(<2 x p1>) = COPY [[PTR_ADD]](<2 x p1>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY9]](<2 x p1>) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(<2 x p1>) = G_PTR_ADD [[BUILD_VECTOR]], [[BUILD_VECTOR4]](<2 x s64>) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(<2 x p1>) = COPY [[PTR_ADD]](<2 x p1>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY8]](<2 x p1>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32)