[GlobalISel] Change ConstantFoldVectorBinop to return vector of APInt

Previously it built MIR for the results and returned a Register.

This avoids building constants for earlier elements of the vector if
later elements will fail to fold, and allows CSEMIRBuilder::buildInstr
to avoid unconditionally building a copy from the result.

Use a new helper function MachineIRBuilder::buildBuildVectorConstant
to build a G_BUILD_VECTOR of G_CONSTANTs.

Differential Revision: https://reviews.llvm.org/D117758
This commit is contained in:
Jay Foad 2022-01-20 09:38:24 +00:00
parent 84b5f7c38c
commit 26e1ebd3ea
7 changed files with 38 additions and 33 deletions

View File

@ -1008,6 +1008,11 @@ public:
MachineInstrBuilder buildBuildVector(const DstOp &Res, MachineInstrBuilder buildBuildVector(const DstOp &Res,
ArrayRef<Register> Ops); ArrayRef<Register> Ops);
/// Build and insert \p Res = G_BUILD_VECTOR \p Op0, ... where each OpN is
/// built with G_CONSTANT.
MachineInstrBuilder buildBuildVectorConstant(const DstOp &Res,
ArrayRef<APInt> Ops);
/// Build and insert \p Res = G_BUILD_VECTOR with \p Src replicated to fill /// Build and insert \p Res = G_BUILD_VECTOR with \p Src replicated to fill
/// the number of elements /// the number of elements
MachineInstrBuilder buildSplatVector(const DstOp &Res, MachineInstrBuilder buildSplatVector(const DstOp &Res,

View File

@ -269,13 +269,10 @@ Optional<APFloat> ConstantFoldFPBinOp(unsigned Opcode, const Register Op1,
const MachineRegisterInfo &MRI); const MachineRegisterInfo &MRI);
/// Tries to constant fold a vector binop with sources \p Op1 and \p Op2. /// Tries to constant fold a vector binop with sources \p Op1 and \p Op2.
/// If successful, returns the G_BUILD_VECTOR representing the folded vector /// Returns an empty vector on failure.
/// constant. \p MIB should have an insertion point already set to create new SmallVector<APInt> ConstantFoldVectorBinop(unsigned Opcode, const Register Op1,
/// G_CONSTANT instructions as needed. const Register Op2,
Register ConstantFoldVectorBinop(unsigned Opcode, const Register Op1, const MachineRegisterInfo &MRI);
const Register Op2,
const MachineRegisterInfo &MRI,
MachineIRBuilder &MIB);
Optional<APInt> ConstantFoldExtOp(unsigned Opcode, const Register Op1, Optional<APInt> ConstantFoldExtOp(unsigned Opcode, const Register Op1,
uint64_t Imm, const MachineRegisterInfo &MRI); uint64_t Imm, const MachineRegisterInfo &MRI);

View File

@ -203,10 +203,10 @@ MachineInstrBuilder CSEMIRBuilder::buildInstr(unsigned Opc,
if (SrcTy.isVector()) { if (SrcTy.isVector()) {
// Try to constant fold vector constants. // Try to constant fold vector constants.
Register VecCst = ConstantFoldVectorBinop( SmallVector<APInt> VecCst = ConstantFoldVectorBinop(
Opc, SrcOps[0].getReg(), SrcOps[1].getReg(), *getMRI(), *this); Opc, SrcOps[0].getReg(), SrcOps[1].getReg(), *getMRI());
if (VecCst) if (!VecCst.empty())
return buildCopy(DstOps[0], VecCst); return buildBuildVectorConstant(DstOps[0], VecCst);
break; break;
} }

View File

@ -664,6 +664,17 @@ MachineInstrBuilder MachineIRBuilder::buildBuildVector(const DstOp &Res,
return buildInstr(TargetOpcode::G_BUILD_VECTOR, Res, TmpVec); return buildInstr(TargetOpcode::G_BUILD_VECTOR, Res, TmpVec);
} }
MachineInstrBuilder
MachineIRBuilder::buildBuildVectorConstant(const DstOp &Res,
ArrayRef<APInt> Ops) {
SmallVector<SrcOp> TmpVec;
TmpVec.reserve(Ops.size());
LLT EltTy = Res.getLLTTy(*getMRI()).getElementType();
for (auto &Op : Ops)
TmpVec.push_back(buildConstant(EltTy, Op));
return buildInstr(TargetOpcode::G_BUILD_VECTOR, Res, TmpVec);
}
MachineInstrBuilder MachineIRBuilder::buildSplatVector(const DstOp &Res, MachineInstrBuilder MachineIRBuilder::buildSplatVector(const DstOp &Res,
const SrcOp &Src) { const SrcOp &Src) {
SmallVector<SrcOp, 8> TmpVec(Res.getLLTTy(*getMRI()).getNumElements(), Src); SmallVector<SrcOp, 8> TmpVec(Res.getLLTTy(*getMRI()).getNumElements(), Src);

View File

@ -608,33 +608,27 @@ Optional<APFloat> llvm::ConstantFoldFPBinOp(unsigned Opcode, const Register Op1,
return None; return None;
} }
Register llvm::ConstantFoldVectorBinop(unsigned Opcode, const Register Op1, SmallVector<APInt>
const Register Op2, llvm::ConstantFoldVectorBinop(unsigned Opcode, const Register Op1,
const MachineRegisterInfo &MRI, const Register Op2,
MachineIRBuilder &MIB) { const MachineRegisterInfo &MRI) {
auto *SrcVec2 = getOpcodeDef<GBuildVector>(Op2, MRI); auto *SrcVec2 = getOpcodeDef<GBuildVector>(Op2, MRI);
if (!SrcVec2) if (!SrcVec2)
return Register(); return SmallVector<APInt>();
auto *SrcVec1 = getOpcodeDef<GBuildVector>(Op1, MRI); auto *SrcVec1 = getOpcodeDef<GBuildVector>(Op1, MRI);
if (!SrcVec1) if (!SrcVec1)
return Register(); return SmallVector<APInt>();
const LLT EltTy = MRI.getType(SrcVec1->getSourceReg(0)); SmallVector<APInt> FoldedElements;
SmallVector<Register, 16> FoldedElements;
for (unsigned Idx = 0, E = SrcVec1->getNumSources(); Idx < E; ++Idx) { for (unsigned Idx = 0, E = SrcVec1->getNumSources(); Idx < E; ++Idx) {
auto MaybeCst = ConstantFoldBinOp(Opcode, SrcVec1->getSourceReg(Idx), auto MaybeCst = ConstantFoldBinOp(Opcode, SrcVec1->getSourceReg(Idx),
SrcVec2->getSourceReg(Idx), MRI); SrcVec2->getSourceReg(Idx), MRI);
if (!MaybeCst) if (!MaybeCst)
return Register(); return SmallVector<APInt>();
auto FoldedCstReg = MIB.buildConstant(EltTy, *MaybeCst).getReg(0); FoldedElements.push_back(*MaybeCst);
FoldedElements.emplace_back(FoldedCstReg);
} }
// Create the new vector constant. return FoldedElements;
auto CstVec =
MIB.buildBuildVector(MRI.getType(SrcVec1->getReg(0)), FoldedElements);
return CstVec.getReg(0);
} }
bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI, bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI,

View File

@ -12,8 +12,7 @@ define amdgpu_kernel void @constant_fold_vector_add() {
; CHECK-NEXT: [[C1:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[C2]](s64), [[C2]](s64), [[C2]](s64), [[C2]](s64) ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[C2]](s64), [[C2]](s64), [[C2]](s64), [[C2]](s64)
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY [[BUILD_VECTOR1]](<4 x s64>) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s64>), [[C1]](p1) :: (store (<4 x s64>) into `<4 x i64> addrspace(1)* null`, addrspace 1)
; CHECK-NEXT: G_STORE [[COPY]](<4 x s64>), [[C1]](p1) :: (store (<4 x s64>) into `<4 x i64> addrspace(1)* null`, addrspace 1)
; CHECK-NEXT: S_ENDPGM 0 ; CHECK-NEXT: S_ENDPGM 0
entry: entry:
%add = add <4 x i64> zeroinitializer, zeroinitializer %add = add <4 x i64> zeroinitializer, zeroinitializer

View File

@ -184,10 +184,9 @@ define <2 x i32 addrspace(1)*> @vector_gep_v2p1_index_v2i64_constant(<2 x i32 ad
; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C2]](s64), [[C2]](s64) ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C2]](s64), [[C2]](s64)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C2]](s64), [[C3]](s64) ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C2]](s64), [[C3]](s64)
; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s64>) = COPY [[BUILD_VECTOR4]](<2 x s64>) ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(<2 x p1>) = G_PTR_ADD [[BUILD_VECTOR]], [[BUILD_VECTOR4]](<2 x s64>)
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(<2 x p1>) = G_PTR_ADD [[BUILD_VECTOR]], [[COPY8]](<2 x s64>) ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(<2 x p1>) = COPY [[PTR_ADD]](<2 x p1>)
; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(<2 x p1>) = COPY [[PTR_ADD]](<2 x p1>) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY8]](<2 x p1>)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY9]](<2 x p1>)
; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32)