Add support for building the compactiontable for bytecode files. This shrinks

the bytecode file for 176.gcc by about 200K (10%), and 254.gap by about 167K,
a 25% reduction.  There is still a lot of room for improvement in the encoding
of the compaction table.

llvm-svn: 10913
This commit is contained in:
Chris Lattner 2004-01-18 21:07:07 +00:00
parent 23e583b5dc
commit ca1f8afe85
1 changed files with 222 additions and 55 deletions

View File

@ -36,6 +36,7 @@ using namespace llvm;
SlotCalculator::SlotCalculator(const Module *M, bool buildBytecodeInfo) {
BuildBytecodeInfo = buildBytecodeInfo;
ModuleContainsAllFunctionConstants = false;
TheModule = M;
// Preload table... Make sure that all of the primitive types are in the table
@ -53,6 +54,7 @@ SlotCalculator::SlotCalculator(const Module *M, bool buildBytecodeInfo) {
SlotCalculator::SlotCalculator(const Function *M, bool buildBytecodeInfo) {
BuildBytecodeInfo = buildBytecodeInfo;
ModuleContainsAllFunctionConstants = false;
TheModule = M ? M->getParent() : 0;
// Preload table... Make sure that all of the primitive types are in the table
@ -70,6 +72,17 @@ SlotCalculator::SlotCalculator(const Function *M, bool buildBytecodeInfo) {
incorporateFunction(M); // Start out in incorporated state
}
unsigned SlotCalculator::getGlobalSlot(const Value *V) const {
assert(!CompactionTable.empty() &&
"This method can only be used when compaction is enabled!");
if (const ConstantPointerRef *CPR = dyn_cast<ConstantPointerRef>(V))
V = CPR->getValue();
std::map<const Value*, unsigned>::const_iterator I = NodeMap.find(V);
assert(I != NodeMap.end() && "Didn't find entry!");
return I->second;
}
// processModule - Process all of the module level function declarations and
// types that are available.
@ -127,23 +140,36 @@ void SlotCalculator::processModule() {
}
}
#if 0
// FIXME: Empirically, this causes the bytecode files to get BIGGER, because
// it explodes the operand size numbers to be bigger than can be handled
// compactly, which offsets the ~40% savings in constant sizes. Whoops.
// If we are emitting a bytecode file, scan all of the functions for their
// constants, which allows us to emit more compact modules. This is optional,
// and is just used to compactify the constants used by different functions
// together.
//
// This functionality is completely optional for the bytecode writer, but
// tends to produce smaller bytecode files. This should not be used in the
// future by clients that want to, for example, build and emit functions on
// the fly. For now, however, it is unconditionally enabled when building
// bytecode information.
//
if (BuildBytecodeInfo) {
ModuleContainsAllFunctionConstants = true;
SC_DEBUG("Inserting function constants:\n");
for (Module::const_iterator F = TheModule->begin(), E = TheModule->end();
F != E; ++F)
for_each(constant_begin(F), constant_end(F),
bind_obj(this, &SlotCalculator::getOrCreateSlot));
F != E; ++F) {
for (const_inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I){
for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op)
if (isa<Constant>(I->getOperand(op)))
getOrCreateSlot(I->getOperand(op));
getOrCreateSlot(I->getType());
if (const VANextInst *VAN = dyn_cast<VANextInst>(*I))
getOrCreateSlot(VAN->getArgType());
}
processSymbolTableConstants(&F->getSymbolTable());
}
}
#endif
// Insert constants that are named at module level into the slot pool so that
// the module symbol table can refer to them...
@ -220,27 +246,32 @@ void SlotCalculator::incorporateFunction(const Function *F) {
SC_DEBUG("begin processFunction!\n");
// Save the Table state before we process the function...
for (unsigned i = 0; i < Table.size(); ++i)
ModuleLevel.push_back(Table[i].size());
SC_DEBUG("Inserting function arguments\n");
// If we emitted all of the function constants, build a compaction table.
if (BuildBytecodeInfo && ModuleContainsAllFunctionConstants)
buildCompactionTable(F);
else {
// Save the Table state before we process the function...
for (unsigned i = 0, e = Table.size(); i != e; ++i)
ModuleLevel.push_back(Table[i].size());
}
// Iterate over function arguments, adding them to the value table...
for(Function::const_aiterator I = F->abegin(), E = F->aend(); I != E; ++I)
getOrCreateSlot(I);
// Iterate over all of the instructions in the function, looking for constant
// values that are referenced. Add these to the value pools before any
// nonconstant values. This will be turned into the constant pool for the
// bytecode writer.
//
if (BuildBytecodeInfo) { // Assembly writer does not need this!
// Emit all of the constants that are being used by the instructions in the
// function...
if (BuildBytecodeInfo && // Assembly writer does not need this!
!ModuleContainsAllFunctionConstants) {
// Iterate over all of the instructions in the function, looking for
// constant values that are referenced. Add these to the value pools
// before any nonconstant values. This will be turned into the constant
// pool for the bytecode writer.
//
// Emit all of the constants that are being used by the instructions in
// the function...
for_each(constant_begin(F), constant_end(F),
bind_obj(this, &SlotCalculator::getOrCreateSlot));
bind_obj(this, &SlotCalculator::getOrCreateSlot));
// If there is a symbol table, it is possible that the user has names for
// constants that are not being used. In this case, we will have problems
// if we don't emit the constants now, because otherwise we will get
@ -271,42 +302,169 @@ void SlotCalculator::purgeFunction() {
SC_DEBUG("begin purgeFunction!\n");
// First, remove values from existing type planes
for (unsigned i = 0; i < NumModuleTypes; ++i) {
unsigned ModuleSize = ModuleLevel[i]; // Size of plane before function came
TypePlane &CurPlane = Table[i];
//SC_DEBUG("Processing Plane " <<i<< " of size " << CurPlane.size() <<"\n");
while (CurPlane.size() != ModuleSize) {
//SC_DEBUG(" Removing [" << i << "] Value=" << CurPlane.back() << "\n");
std::map<const Value *, unsigned>::iterator NI =
NodeMap.find(CurPlane.back());
assert(NI != NodeMap.end() && "Node not in nodemap?");
NodeMap.erase(NI); // Erase from nodemap
CurPlane.pop_back(); // Shrink plane
// First, free the compaction map if used.
CompactionNodeMap.clear();
// Next, remove values from existing type planes
for (unsigned i = 0; i != NumModuleTypes; ++i)
if (i >= CompactionTable.size() || CompactionTable[i].empty()) {
unsigned ModuleSize = ModuleLevel[i];// Size of plane before function came
TypePlane &CurPlane = Table[i];
while (CurPlane.size() != ModuleSize) {
std::map<const Value *, unsigned>::iterator NI =
NodeMap.find(CurPlane.back());
assert(NI != NodeMap.end() && "Node not in nodemap?");
NodeMap.erase(NI); // Erase from nodemap
CurPlane.pop_back(); // Shrink plane
}
}
}
// We don't need this state anymore, free it up.
ModuleLevel.clear();
// Next, remove any type planes defined by the function...
while (NumModuleTypes != Table.size()) {
TypePlane &Plane = Table.back();
SC_DEBUG("Removing Plane " << (Table.size()-1) << " of size "
<< Plane.size() << "\n");
while (Plane.size()) {
NodeMap.erase(NodeMap.find(Plane.back())); // Erase from nodemap
Plane.pop_back(); // Shrink plane
if (!CompactionTable.empty()) {
CompactionTable.clear();
} else {
// FIXME: this will require adjustment when we don't compact everything.
// Finally, remove any type planes defined by the function...
while (NumModuleTypes != Table.size()) {
TypePlane &Plane = Table.back();
SC_DEBUG("Removing Plane " << (Table.size()-1) << " of size "
<< Plane.size() << "\n");
while (Plane.size()) {
NodeMap.erase(NodeMap.find(Plane.back())); // Erase from nodemap
Plane.pop_back(); // Shrink plane
}
Table.pop_back(); // Nuke the plane, we don't like it.
}
Table.pop_back(); // Nuke the plane, we don't like it.
}
SC_DEBUG("end purgeFunction!\n");
}
static inline bool hasNullValue(unsigned TyID) {
return TyID != Type::LabelTyID && TyID != Type::TypeTyID &&
TyID != Type::VoidTyID;
}
/// getOrCreateCompactionTableSlot - This method is used to build up the initial
/// approximation of the compaction table.
unsigned SlotCalculator::getOrCreateCompactionTableSlot(const Value *V) {
std::map<const Value*, unsigned>::iterator I =
CompactionNodeMap.lower_bound(V);
if (I != CompactionNodeMap.end() && I->first == V)
return I->second; // Already exists?
// Make sure the type is in the table.
unsigned Ty = getOrCreateCompactionTableSlot(V->getType());
if (CompactionTable.size() <= Ty)
CompactionTable.resize(Ty+1);
assert(!isa<Type>(V) || ModuleLevel.empty());
TypePlane &TyPlane = CompactionTable[Ty];
// Make sure to insert the null entry if the thing we are inserting is not a
// null constant.
if (TyPlane.empty() && hasNullValue(V->getType()->getPrimitiveID())) {
Value *ZeroInitializer = Constant::getNullValue(V->getType());
if (V != ZeroInitializer) {
TyPlane.push_back(ZeroInitializer);
CompactionNodeMap[ZeroInitializer] = 0;
}
}
unsigned SlotNo = TyPlane.size();
TyPlane.push_back(V);
CompactionNodeMap.insert(std::make_pair(V, SlotNo));
return SlotNo;
}
/// buildCompactionTable - Since all of the function constants and types are
/// stored in the module-level constant table, we don't need to emit a function
/// constant table. Also due to this, the indices for various constants and
/// types might be very large in large programs. In order to avoid blowing up
/// the size of instructions in the bytecode encoding, we build a compaction
/// table, which defines a mapping from function-local identifiers to global
/// identifiers.
void SlotCalculator::buildCompactionTable(const Function *F) {
assert(CompactionNodeMap.empty() && "Compaction table already built!");
// First step, insert the primitive types.
CompactionTable.resize(Type::TypeTyID+1);
for (unsigned i = 0; i != Type::FirstDerivedTyID; ++i) {
const Type *PrimTy = Type::getPrimitiveType((Type::PrimitiveID)i);
CompactionTable[Type::TypeTyID].push_back(PrimTy);
CompactionNodeMap[PrimTy] = i;
}
// Next, include any types used by function arguments.
for (Function::const_aiterator I = F->abegin(), E = F->aend(); I != E; ++I)
getOrCreateCompactionTableSlot(I->getType());
// Next, find all of the types and values that are referred to by the
// instructions in the program.
for (const_inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) {
getOrCreateCompactionTableSlot(I->getType());
for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op)
if (isa<Constant>(I->getOperand(op)) ||
isa<GlobalValue>(I->getOperand(op)))
getOrCreateCompactionTableSlot(I->getOperand(op));
if (const VANextInst *VAN = dyn_cast<VANextInst>(*I))
getOrCreateCompactionTableSlot(VAN->getArgType());
}
const SymbolTable &ST = F->getSymbolTable();
for (SymbolTable::const_iterator I = ST.begin(), E = ST.end(); I != E; ++I)
for (SymbolTable::type_const_iterator TI = I->second.begin(),
TE = I->second.end(); TI != TE; ++TI)
if (isa<Constant>(TI->second) || isa<Type>(TI->second) ||
isa<GlobalValue>(TI->second))
getOrCreateCompactionTableSlot(TI->second);
// Now that we have all of the values in the table, and know what types are
// referenced, make sure that there is at least the zero initializer in any
// used type plane. Since the type was used, we will be emitting instructions
// to the plane even if there are no constants in it.
CompactionTable.resize(CompactionTable[Type::TypeTyID].size());
for (unsigned i = 0, e = CompactionTable.size(); i != e; ++i)
if (CompactionTable[i].empty() && i != Type::VoidTyID &&
i != Type::LabelTyID) {
const Type *Ty = cast<Type>(CompactionTable[Type::TypeTyID][i]);
getOrCreateCompactionTableSlot(Constant::getNullValue(Ty));
}
// Okay, now at this point, we have a legal compaction table. Since we want
// to emit the smallest possible binaries, we delete planes that do not NEED
// to be compacted, starting with the type plane.
// If decided not to compact anything, do not modify ModuleLevels.
if (CompactionTable.empty())
// FIXME: must update ModuleLevel.
return;
// Finally, for any planes that we have decided to compact, update the
// ModuleLevel entries to be accurate.
// FIXME: This does not yet work for partially compacted tables.
ModuleLevel.resize(CompactionTable.size());
for (unsigned i = 0, e = CompactionTable.size(); i != e; ++i)
ModuleLevel[i] = CompactionTable[i].size();
}
int SlotCalculator::getSlot(const Value *V) const {
// If there is a CompactionTable active...
if (!CompactionNodeMap.empty()) {
std::map<const Value*, unsigned>::const_iterator I =
CompactionNodeMap.find(V);
if (I != CompactionNodeMap.end())
return (int)I->second;
return -1;
}
std::map<const Value*, unsigned>::const_iterator I = NodeMap.find(V);
if (I != NodeMap.end())
return (int)I->second;
@ -327,8 +485,11 @@ int SlotCalculator::getOrCreateSlot(const Value *V) {
if (const ConstantPointerRef *CPR = dyn_cast<ConstantPointerRef>(V))
return getOrCreateSlot(CPR->getValue());
if (!isa<GlobalValue>(V))
if (!isa<GlobalValue>(V)) // Initializers for globals are handled explicitly
if (const Constant *C = dyn_cast<Constant>(V)) {
assert(CompactionNodeMap.empty() &&
"All needed constants should be in the compaction map already!");
// If we are emitting a bytecode file, do not index the characters that
// make up constant strings. We emit constant strings as special
// entities that don't require their individual characters to be emitted.
@ -358,6 +519,17 @@ int SlotCalculator::insertValue(const Value *D, bool dontIgnore) {
assert(D && "Can't insert a null value!");
assert(getSlot(D) == -1 && "Value is already in the table!");
// If we are building a compaction map, and if this plane is being compacted,
// insert the value into the compaction map, not into the global map.
if (!CompactionNodeMap.empty()) {
if (D->getType() == Type::VoidTy) return -1; // Do not insert void values
assert(!isa<Type>(D) && !isa<Constant>(D) && !isa<GlobalValue>(D) &&
"Types, constants, and globals should be in global SymTab!");
// FIXME: this does not yet handle partially compacted tables yet!
return getOrCreateCompactionTableSlot(D);
}
// If this node does not contribute to a plane, or if the node has a
// name and we don't want names, then ignore the silly node... Note that types
// do need slot numbers so that we can keep track of where other values land.
@ -406,11 +578,6 @@ int SlotCalculator::insertValue(const Value *D, bool dontIgnore) {
return doInsertValue(D);
}
static inline bool hasNullValue(unsigned TyID) {
return TyID != Type::LabelTyID && TyID != Type::TypeTyID &&
TyID != Type::VoidTyID;
}
// doInsertValue - This is a small helper function to be called only
// be insertValue.
//