forked from OSchip/llvm-project
[Demangle] Add support for D symbols back referencing
This patch adds support for identifier back referencing allowing compressed mangled names by avoiding repetitiveness. Signed-off-by: Luís Ferreira <contact@lsferreira.net> Reviewed By: dblaikie Differential Revision: https://reviews.llvm.org/D111417
This commit is contained in:
parent
669bfcf036
commit
bec08795db
|
@ -70,6 +70,41 @@ private:
|
||||||
/// \see https://dlang.org/spec/abi.html#Number .
|
/// \see https://dlang.org/spec/abi.html#Number .
|
||||||
const char *decodeNumber(const char *Mangled, unsigned long *Ret);
|
const char *decodeNumber(const char *Mangled, unsigned long *Ret);
|
||||||
|
|
||||||
|
/// Extract the back reference position from a given string.
|
||||||
|
///
|
||||||
|
/// \param Mangled string to extract the back reference position.
|
||||||
|
/// \param Ret assigned result value.
|
||||||
|
///
|
||||||
|
/// \return the remaining string on success or nullptr on failure.
|
||||||
|
///
|
||||||
|
/// \note Ret is always >= 0 on success, and unspecified on failure
|
||||||
|
///
|
||||||
|
/// \see https://dlang.org/spec/abi.html#back_ref .
|
||||||
|
/// \see https://dlang.org/spec/abi.html#NumberBackRef .
|
||||||
|
const char *decodeBackrefPos(const char *Mangled, long &Ret);
|
||||||
|
|
||||||
|
/// Extract the symbol pointed by the back reference form a given string.
|
||||||
|
///
|
||||||
|
/// \param Mangled string to extract the back reference position.
|
||||||
|
/// \param Ret assigned result value.
|
||||||
|
///
|
||||||
|
/// \return the remaining string on success or nullptr on failure.
|
||||||
|
///
|
||||||
|
/// \see https://dlang.org/spec/abi.html#back_ref .
|
||||||
|
const char *decodeBackref(const char *Mangled, const char *&Ret);
|
||||||
|
|
||||||
|
/// Extract and demangle backreferenced symbol from a given mangled symbol
|
||||||
|
/// and append it to the output string.
|
||||||
|
///
|
||||||
|
/// \param Demangled output buffer to write the demangled name.
|
||||||
|
/// \param Mangled mangled symbol to be demangled.
|
||||||
|
///
|
||||||
|
/// \return the remaining string on success or nullptr on failure.
|
||||||
|
///
|
||||||
|
/// \see https://dlang.org/spec/abi.html#back_ref .
|
||||||
|
/// \see https://dlang.org/spec/abi.html#IdentifierBackRef .
|
||||||
|
const char *parseSymbolBackref(OutputBuffer *Demangled, const char *Mangled);
|
||||||
|
|
||||||
/// Check whether it is the beginning of a symbol name.
|
/// Check whether it is the beginning of a symbol name.
|
||||||
///
|
///
|
||||||
/// \param Mangled string to extract the symbol name.
|
/// \param Mangled string to extract the symbol name.
|
||||||
|
@ -156,12 +191,108 @@ const char *Demangler::decodeNumber(const char *Mangled, unsigned long *Ret) {
|
||||||
return Mangled;
|
return Mangled;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const char *Demangler::decodeBackrefPos(const char *Mangled, long &Ret) {
|
||||||
|
// Return nullptr if trying to extract something that isn't a digit
|
||||||
|
if (Mangled == nullptr || !std::isalpha(*Mangled))
|
||||||
|
return nullptr;
|
||||||
|
|
||||||
|
// Any identifier or non-basic type that has been emitted to the mangled
|
||||||
|
// symbol before will not be emitted again, but is referenced by a special
|
||||||
|
// sequence encoding the relative position of the original occurrence in the
|
||||||
|
// mangled symbol name.
|
||||||
|
// Numbers in back references are encoded with base 26 by upper case letters
|
||||||
|
// A-Z for higher digits but lower case letters a-z for the last digit.
|
||||||
|
// NumberBackRef:
|
||||||
|
// [a-z]
|
||||||
|
// [A-Z] NumberBackRef
|
||||||
|
// ^
|
||||||
|
unsigned long Val = 0;
|
||||||
|
|
||||||
|
while (std::isalpha(*Mangled)) {
|
||||||
|
// Check for overflow
|
||||||
|
if (Val > (std::numeric_limits<unsigned long>::max() - 25) / 26)
|
||||||
|
break;
|
||||||
|
|
||||||
|
Val *= 26;
|
||||||
|
|
||||||
|
if (Mangled[0] >= 'a' && Mangled[0] <= 'z') {
|
||||||
|
Val += Mangled[0] - 'a';
|
||||||
|
if ((long)Val <= 0)
|
||||||
|
break;
|
||||||
|
Ret = Val;
|
||||||
|
return Mangled + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
Val += Mangled[0] - 'A';
|
||||||
|
++Mangled;
|
||||||
|
}
|
||||||
|
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
const char *Demangler::decodeBackref(const char *Mangled, const char *&Ret) {
|
||||||
|
assert(Mangled != nullptr && *Mangled == 'Q' && "Invalid back reference!");
|
||||||
|
Ret = nullptr;
|
||||||
|
|
||||||
|
// Position of 'Q'
|
||||||
|
const char *Qpos = Mangled;
|
||||||
|
long RefPos;
|
||||||
|
++Mangled;
|
||||||
|
|
||||||
|
Mangled = decodeBackrefPos(Mangled, RefPos);
|
||||||
|
if (Mangled == nullptr)
|
||||||
|
return nullptr;
|
||||||
|
|
||||||
|
if (RefPos > Qpos - Str)
|
||||||
|
return nullptr;
|
||||||
|
|
||||||
|
// Set the position of the back reference.
|
||||||
|
Ret = Qpos - RefPos;
|
||||||
|
|
||||||
|
return Mangled;
|
||||||
|
}
|
||||||
|
|
||||||
|
const char *Demangler::parseSymbolBackref(OutputBuffer *Demangled,
|
||||||
|
const char *Mangled) {
|
||||||
|
// An identifier back reference always points to a digit 0 to 9.
|
||||||
|
// IdentifierBackRef:
|
||||||
|
// Q NumberBackRef
|
||||||
|
// ^
|
||||||
|
const char *Backref;
|
||||||
|
unsigned long Len;
|
||||||
|
|
||||||
|
// Get position of the back reference
|
||||||
|
Mangled = decodeBackref(Mangled, Backref);
|
||||||
|
|
||||||
|
// Must point to a simple identifier
|
||||||
|
Backref = decodeNumber(Backref, &Len);
|
||||||
|
if (Backref == nullptr || strlen(Backref) < Len)
|
||||||
|
return nullptr;
|
||||||
|
|
||||||
|
Backref = parseLName(Demangled, Backref, Len);
|
||||||
|
if (Backref == nullptr)
|
||||||
|
return nullptr;
|
||||||
|
|
||||||
|
return Mangled;
|
||||||
|
}
|
||||||
|
|
||||||
bool Demangler::isSymbolName(const char *Mangled) {
|
bool Demangler::isSymbolName(const char *Mangled) {
|
||||||
|
long Ret;
|
||||||
|
const char *Qref = Mangled;
|
||||||
|
|
||||||
if (std::isdigit(*Mangled))
|
if (std::isdigit(*Mangled))
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
// TODO: Handle symbol back references and template instances.
|
// TODO: Handle template instances.
|
||||||
|
|
||||||
|
if (*Mangled != 'Q')
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
Mangled = decodeBackrefPos(Mangled + 1, Ret);
|
||||||
|
if (Mangled == nullptr || Ret > Qref - Str)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return std::isdigit(Qref[-Ret]);
|
||||||
}
|
}
|
||||||
|
|
||||||
const char *Demangler::parseMangle(OutputBuffer *Demangled,
|
const char *Demangler::parseMangle(OutputBuffer *Demangled,
|
||||||
|
@ -237,7 +368,10 @@ const char *Demangler::parseIdentifier(OutputBuffer *Demangled,
|
||||||
if (Mangled == nullptr || *Mangled == '\0')
|
if (Mangled == nullptr || *Mangled == '\0')
|
||||||
return nullptr;
|
return nullptr;
|
||||||
|
|
||||||
// TODO: Parse back references and lengthless template instances.
|
if (*Mangled == 'Q')
|
||||||
|
return parseSymbolBackref(Demangled, Mangled);
|
||||||
|
|
||||||
|
// TODO: Parse lengthless template instances.
|
||||||
|
|
||||||
const char *Endptr = decodeNumber(Mangled, &Len);
|
const char *Endptr = decodeNumber(Mangled, &Len);
|
||||||
|
|
||||||
|
|
|
@ -52,4 +52,15 @@ INSTANTIATE_TEST_SUITE_P(
|
||||||
std::make_pair("_D8demangle3foo",
|
std::make_pair("_D8demangle3foo",
|
||||||
nullptr), // symbol without a type sequence.
|
nullptr), // symbol without a type sequence.
|
||||||
std::make_pair("_D8demangle3fooinvalidtypeseq",
|
std::make_pair("_D8demangle3fooinvalidtypeseq",
|
||||||
nullptr))); // invalid type sequence.
|
nullptr), // invalid type sequence.
|
||||||
|
std::make_pair(
|
||||||
|
"_D8demangle3ABCQe1ai",
|
||||||
|
"demangle.ABC.ABC.a"), // symbol back reference: `Qe` is a back
|
||||||
|
// reference for position 5, counting from e
|
||||||
|
// char, so decoding it points to `3`. Since
|
||||||
|
// `3` is a number, 3 chars get read and it
|
||||||
|
// succeeded.
|
||||||
|
std::make_pair("_D8demangle3ABCQa1ai",
|
||||||
|
nullptr), // invalid symbol back reference (recursive).
|
||||||
|
std::make_pair("_D8demangleQDXXXXXXXXXXXXx",
|
||||||
|
nullptr))); // overflow back reference position.
|
||||||
|
|
Loading…
Reference in New Issue