forked from OSchip/llvm-project
[tablegen] Emit string literals instead of char arrays
This changes the generated (Instr|Asm|Reg|Regclass)Name tables from this form: extern const char HexagonInstrNameData[] = { /* 0 */ 'G', '_', 'F', 'L', 'O', 'G', '1', '0', 0, /* 9 */ 'E', 'N', 'D', 'L', 'O', 'O', 'P', '0', 0, /* 18 */ 'V', '6', '_', 'v', 'd', 'd', '0', 0, /* 26 */ 'P', 'S', '_', 'v', 'd', 'd', '0', 0, [...] }; ...to this: extern const char HexagonInstrNameData[] = { /* 0 */ "G_FLOG10\0" /* 9 */ "ENDLOOP0\0" /* 18 */ "V6_vdd0\0" /* 26 */ "PS_vdd0\0" [...] }; This should make debugging and exploration a lot easier for mortals, while providing a significant compile-time reduction for common compilers. To avoid issues with low implementation limits, this is disabled by default for visual studio. To force output one way or the other, pass `--long-string-literals=<bool>` to `tablegen` Reviewers: mstorsjo, rnk Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D73044 A variation of this patch was originally committed ince23515f5a
and then reverted ine464b31c
due to build failures.
This commit is contained in:
parent
3ed88b052b
commit
482e890d1f
|
@ -58,6 +58,14 @@ function(tablegen project ofn)
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
# MSVC can't support long string literals ("long" > 65534 bytes)[1], so if there's
|
||||||
|
# a possibility of generated tables being consumed by MSVC, generate arrays of
|
||||||
|
# char literals, instead. If we're cross-compiling, then conservatively assume
|
||||||
|
# that the source might be consumed by MSVC.
|
||||||
|
# [1] https://docs.microsoft.com/en-us/cpp/cpp/compiler-limits?view=vs-2017
|
||||||
|
if (MSVC AND project STREQUAL LLVM)
|
||||||
|
list(APPEND LLVM_TABLEGEN_FLAGS "--long-string-literals=0")
|
||||||
|
endif()
|
||||||
if (CMAKE_GENERATOR MATCHES "Visual Studio")
|
if (CMAKE_GENERATOR MATCHES "Visual Studio")
|
||||||
# Visual Studio has problems with llvm-tblgen's native --write-if-changed
|
# Visual Studio has problems with llvm-tblgen's native --write-if-changed
|
||||||
# behavior. Since it doesn't do restat optimizations anyway, just don't
|
# behavior. Since it doesn't do restat optimizations anyway, just don't
|
||||||
|
|
|
@ -380,9 +380,7 @@ void AsmWriterEmitter::EmitPrintInstruction(raw_ostream &O) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Emit the string table itself.
|
// Emit the string table itself.
|
||||||
O << " static const char AsmStrs[] = {\n";
|
StringTable.emitStringLiteralDef(O, " static const char AsmStrs[]");
|
||||||
StringTable.emit(O, printChar);
|
|
||||||
O << " };\n\n";
|
|
||||||
|
|
||||||
// Emit the lookup tables in pieces to minimize wasted bytes.
|
// Emit the lookup tables in pieces to minimize wasted bytes.
|
||||||
unsigned BytesNeeded = ((OpcodeInfoBits - BitsLeft) + 7) / 8;
|
unsigned BytesNeeded = ((OpcodeInfoBits - BitsLeft) + 7) / 8;
|
||||||
|
@ -537,9 +535,8 @@ emitRegisterNameString(raw_ostream &O, StringRef AltName,
|
||||||
}
|
}
|
||||||
|
|
||||||
StringTable.layout();
|
StringTable.layout();
|
||||||
O << " static const char AsmStrs" << AltName << "[] = {\n";
|
StringTable.emitStringLiteralDef(O, Twine(" static const char AsmStrs") +
|
||||||
StringTable.emit(O, printChar);
|
AltName + "[]");
|
||||||
O << " };\n\n";
|
|
||||||
|
|
||||||
O << " static const " << getMinimalTypeForRange(StringTable.size() - 1, 32)
|
O << " static const " << getMinimalTypeForRange(StringTable.size() - 1, 32)
|
||||||
<< " RegAsmOffset" << AltName << "[] = {";
|
<< " RegAsmOffset" << AltName << "[] = {";
|
||||||
|
|
|
@ -569,9 +569,8 @@ void InstrInfoEmitter::run(raw_ostream &OS) {
|
||||||
|
|
||||||
// Emit the array of instruction names.
|
// Emit the array of instruction names.
|
||||||
InstrNames.layout();
|
InstrNames.layout();
|
||||||
OS << "extern const char " << TargetName << "InstrNameData[] = {\n";
|
InstrNames.emitStringLiteralDef(OS, Twine("extern const char ") + TargetName +
|
||||||
InstrNames.emit(OS, printChar);
|
"InstrNameData[]");
|
||||||
OS << "};\n\n";
|
|
||||||
|
|
||||||
OS << "extern const unsigned " << TargetName <<"InstrNameIndices[] = {";
|
OS << "extern const unsigned " << TargetName <<"InstrNameIndices[] = {";
|
||||||
Num = 0;
|
Num = 0;
|
||||||
|
|
|
@ -992,9 +992,8 @@ RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target,
|
||||||
|
|
||||||
// Emit the string table.
|
// Emit the string table.
|
||||||
RegStrings.layout();
|
RegStrings.layout();
|
||||||
OS << "extern const char " << TargetName << "RegStrings[] = {\n";
|
RegStrings.emitStringLiteralDef(OS, Twine("extern const char ") + TargetName +
|
||||||
RegStrings.emit(OS, printChar);
|
"RegStrings[]");
|
||||||
OS << "};\n\n";
|
|
||||||
|
|
||||||
OS << "extern const MCRegisterDesc " << TargetName
|
OS << "extern const MCRegisterDesc " << TargetName
|
||||||
<< "RegDesc[] = { // Descriptors\n";
|
<< "RegDesc[] = { // Descriptors\n";
|
||||||
|
@ -1065,9 +1064,8 @@ RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target,
|
||||||
OS << "} // end anonymous namespace\n\n";
|
OS << "} // end anonymous namespace\n\n";
|
||||||
|
|
||||||
RegClassStrings.layout();
|
RegClassStrings.layout();
|
||||||
OS << "extern const char " << TargetName << "RegClassStrings[] = {\n";
|
RegClassStrings.emitStringLiteralDef(
|
||||||
RegClassStrings.emit(OS, printChar);
|
OS, Twine("extern const char ") + TargetName + "RegClassStrings[]");
|
||||||
OS << "};\n\n";
|
|
||||||
|
|
||||||
OS << "extern const MCRegisterClass " << TargetName
|
OS << "extern const MCRegisterClass " << TargetName
|
||||||
<< "MCRegisterClasses[] = {\n";
|
<< "MCRegisterClasses[] = {\n";
|
||||||
|
|
|
@ -15,6 +15,7 @@
|
||||||
#ifndef LLVM_UTILS_TABLEGEN_SEQUENCETOOFFSETTABLE_H
|
#ifndef LLVM_UTILS_TABLEGEN_SEQUENCETOOFFSETTABLE_H
|
||||||
#define LLVM_UTILS_TABLEGEN_SEQUENCETOOFFSETTABLE_H
|
#define LLVM_UTILS_TABLEGEN_SEQUENCETOOFFSETTABLE_H
|
||||||
|
|
||||||
|
#include "llvm/Support/CommandLine.h"
|
||||||
#include "llvm/Support/raw_ostream.h"
|
#include "llvm/Support/raw_ostream.h"
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
|
@ -23,6 +24,61 @@
|
||||||
#include <map>
|
#include <map>
|
||||||
|
|
||||||
namespace llvm {
|
namespace llvm {
|
||||||
|
extern llvm::cl::opt<bool> EmitLongStrLiterals;
|
||||||
|
|
||||||
|
// Helper function for SequenceToOffsetTable<string>.
|
||||||
|
static inline void printStrLitEscChar(raw_ostream &OS, char C) {
|
||||||
|
const char *Escapes[] = {
|
||||||
|
"\\000", "\\001", "\\002", "\\003", "\\004", "\\005", "\\006", "\\007",
|
||||||
|
"\\010", "\\t", "\\n", "\\013", "\\014", "\\r", "\\016", "\\017",
|
||||||
|
"\\020", "\\021", "\\022", "\\023", "\\024", "\\025", "\\026", "\\027",
|
||||||
|
"\\030", "\\031", "\\032", "\\033", "\\034", "\\035", "\\036", "\\037",
|
||||||
|
" ", "!", "\\\"", "#", "$", "%", "&", "'",
|
||||||
|
"(", ")", "*", "+", ",", "-", ".", "/",
|
||||||
|
"0", "1", "2", "3", "4", "5", "6", "7",
|
||||||
|
"8", "9", ":", ";", "<", "=", ">", "?",
|
||||||
|
"@", "A", "B", "C", "D", "E", "F", "G",
|
||||||
|
"H", "I", "J", "K", "L", "M", "N", "O",
|
||||||
|
"P", "Q", "R", "S", "T", "U", "V", "W",
|
||||||
|
"X", "Y", "Z", "[", "\\\\", "]", "^", "_",
|
||||||
|
"`", "a", "b", "c", "d", "e", "f", "g",
|
||||||
|
"h", "i", "j", "k", "l", "m", "n", "o",
|
||||||
|
"p", "q", "r", "s", "t", "u", "v", "w",
|
||||||
|
"x", "y", "z", "{", "|", "}", "~", "\\177",
|
||||||
|
"\\200", "\\201", "\\202", "\\203", "\\204", "\\205", "\\206", "\\207",
|
||||||
|
"\\210", "\\211", "\\212", "\\213", "\\214", "\\215", "\\216", "\\217",
|
||||||
|
"\\220", "\\221", "\\222", "\\223", "\\224", "\\225", "\\226", "\\227",
|
||||||
|
"\\230", "\\231", "\\232", "\\233", "\\234", "\\235", "\\236", "\\237",
|
||||||
|
"\\240", "\\241", "\\242", "\\243", "\\244", "\\245", "\\246", "\\247",
|
||||||
|
"\\250", "\\251", "\\252", "\\253", "\\254", "\\255", "\\256", "\\257",
|
||||||
|
"\\260", "\\261", "\\262", "\\263", "\\264", "\\265", "\\266", "\\267",
|
||||||
|
"\\270", "\\271", "\\272", "\\273", "\\274", "\\275", "\\276", "\\277",
|
||||||
|
"\\300", "\\301", "\\302", "\\303", "\\304", "\\305", "\\306", "\\307",
|
||||||
|
"\\310", "\\311", "\\312", "\\313", "\\314", "\\315", "\\316", "\\317",
|
||||||
|
"\\320", "\\321", "\\322", "\\323", "\\324", "\\325", "\\326", "\\327",
|
||||||
|
"\\330", "\\331", "\\332", "\\333", "\\334", "\\335", "\\336", "\\337",
|
||||||
|
"\\340", "\\341", "\\342", "\\343", "\\344", "\\345", "\\346", "\\347",
|
||||||
|
"\\350", "\\351", "\\352", "\\353", "\\354", "\\355", "\\356", "\\357",
|
||||||
|
"\\360", "\\361", "\\362", "\\363", "\\364", "\\365", "\\366", "\\367",
|
||||||
|
"\\370", "\\371", "\\372", "\\373", "\\374", "\\375", "\\376", "\\377"};
|
||||||
|
|
||||||
|
static_assert(sizeof Escapes / sizeof Escapes[0] ==
|
||||||
|
std::numeric_limits<unsigned char>::max() + 1,
|
||||||
|
"unsupported character type");
|
||||||
|
OS << Escapes[static_cast<unsigned char>(C)];
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void printChar(raw_ostream &OS, char C) {
|
||||||
|
unsigned char UC(C);
|
||||||
|
if (isalnum(UC) || ispunct(UC)) {
|
||||||
|
OS << '\'';
|
||||||
|
if (C == '\\' || C == '\'')
|
||||||
|
OS << '\\';
|
||||||
|
OS << C << '\'';
|
||||||
|
} else {
|
||||||
|
OS << unsigned(UC);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// SequenceToOffsetTable - Collect a number of terminated sequences of T.
|
/// SequenceToOffsetTable - Collect a number of terminated sequences of T.
|
||||||
/// Compute the layout of a table that contains all the sequences, possibly by
|
/// Compute the layout of a table that contains all the sequences, possibly by
|
||||||
|
@ -108,6 +164,37 @@ public:
|
||||||
return I->second + (I->first.size() - Seq.size());
|
return I->second + (I->first.size() - Seq.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// `emitStringLiteralDef` - Print out the table as the body of an array
|
||||||
|
/// initializer, where each element is a C string literal terminated by
|
||||||
|
/// `\0`. Falls back to emitting a comma-separated integer list if
|
||||||
|
/// `EmitLongStrLiterals` is false
|
||||||
|
void emitStringLiteralDef(raw_ostream &OS, const llvm::Twine &Decl) const {
|
||||||
|
assert(Entries && "Call layout() before emitStringLiteralDef()");
|
||||||
|
if (EmitLongStrLiterals) {
|
||||||
|
OS << "\n#ifdef __GNUC__\n"
|
||||||
|
<< "#pragma GCC diagnostic push\n"
|
||||||
|
<< "#pragma GCC diagnostic ignored \"-Woverlength-strings\"\n"
|
||||||
|
<< "#endif\n"
|
||||||
|
<< Decl << " = {\n";
|
||||||
|
} else {
|
||||||
|
OS << Decl << " = {\n";
|
||||||
|
emit(OS, printChar, "0");
|
||||||
|
OS << "\n};\n\n";
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
for (auto I : Seqs) {
|
||||||
|
OS << " /* " << I.second << " */ \"";
|
||||||
|
for (auto C : I.first) {
|
||||||
|
printStrLitEscChar(OS, C);
|
||||||
|
}
|
||||||
|
OS << "\\0\"\n";
|
||||||
|
}
|
||||||
|
OS << "};\n"
|
||||||
|
<< "#ifdef __GNUC__\n"
|
||||||
|
<< "#pragma GCC diagnostic pop\n"
|
||||||
|
<< "#endif\n\n";
|
||||||
|
}
|
||||||
|
|
||||||
/// emit - Print out the table as the body of an array initializer.
|
/// emit - Print out the table as the body of an array initializer.
|
||||||
/// Use the Print function to print elements.
|
/// Use the Print function to print elements.
|
||||||
void emit(raw_ostream &OS,
|
void emit(raw_ostream &OS,
|
||||||
|
@ -127,19 +214,6 @@ public:
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// Helper function for SequenceToOffsetTable<string>.
|
|
||||||
static inline void printChar(raw_ostream &OS, char C) {
|
|
||||||
unsigned char UC(C);
|
|
||||||
if (isalnum(UC) || ispunct(UC)) {
|
|
||||||
OS << '\'';
|
|
||||||
if (C == '\\' || C == '\'')
|
|
||||||
OS << '\\';
|
|
||||||
OS << C << '\'';
|
|
||||||
} else {
|
|
||||||
OS << unsigned(UC);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} // end namespace llvm
|
} // end namespace llvm
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -60,6 +60,12 @@ namespace llvm {
|
||||||
/// Storage for TimeRegionsOpt as a global so that backends aren't required to
|
/// Storage for TimeRegionsOpt as a global so that backends aren't required to
|
||||||
/// include CommandLine.h
|
/// include CommandLine.h
|
||||||
bool TimeRegions = false;
|
bool TimeRegions = false;
|
||||||
|
cl::opt<bool> EmitLongStrLiterals(
|
||||||
|
"long-string-literals",
|
||||||
|
cl::desc("when emitting large string tables, prefer string literals over "
|
||||||
|
"comma-separated char literals. This can be a readability and "
|
||||||
|
"compile-time performance win, but upsets some compilers"),
|
||||||
|
cl::Hidden, cl::init(true));
|
||||||
} // end namespace llvm
|
} // end namespace llvm
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
Loading…
Reference in New Issue