forked from OSchip/llvm-project
[demangler] Initial support for the new Rust mangling scheme
Add a demangling support for a small subset of a new Rust mangling scheme, with complete support planned as a follow up work. Intergate Rust demangling into llvm-cxxfilt and use llvm-cxxfilt for end-to-end testing. The new Rust mangling scheme uses "_R" as a prefix, which makes it easy to disambiguate it from other mangling schemes. The public API is modeled after __cxa_demangle / llvm::itaniumDemangle, since potential candidates for further integration use those. Reviewed By: dblaikie Differential Revision: https://reviews.llvm.org/D101444
This commit is contained in:
parent
73332d73e1
commit
7310403e3c
|
@ -57,6 +57,9 @@ char *microsoftDemangle(const char *mangled_name, size_t *n_read,
|
|||
char *buf, size_t *n_buf,
|
||||
int *status, MSDemangleFlags Flags = MSDF_None);
|
||||
|
||||
// Demangles a Rust v0 mangled symbol. The API follows that of __cxa_demangle.
|
||||
char *rustDemangle(const char *MangledName, char *Buf, size_t *N, int *Status);
|
||||
|
||||
/// Attempt to demangle a string using different demangling schemes.
|
||||
/// The function uses heuristics to determine which demangling scheme to use.
|
||||
/// \param MangledName - reference to string to demangle.
|
||||
|
|
|
@ -0,0 +1,118 @@
|
|||
//===--- RustDemangle.h -----------------------------------------*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_DEMANGLE_RUSTDEMANGLE_H
|
||||
#define LLVM_DEMANGLE_RUSTDEMANGLE_H
|
||||
|
||||
#include "llvm/Demangle/DemangleConfig.h"
|
||||
#include "llvm/Demangle/StringView.h"
|
||||
#include "llvm/Demangle/Utility.h"
|
||||
|
||||
namespace llvm {
|
||||
namespace rust_demangle {
|
||||
|
||||
using llvm::itanium_demangle::OutputStream;
|
||||
using llvm::itanium_demangle::StringView;
|
||||
|
||||
struct Identifier {
|
||||
StringView Name;
|
||||
bool Punycode;
|
||||
|
||||
bool empty() const { return Name.empty(); }
|
||||
};
|
||||
|
||||
class Demangler {
|
||||
// Maximum recursion level. Used to avoid stack overflow.
|
||||
size_t MaxRecursionLevel;
|
||||
// Current recursion level.
|
||||
size_t RecursionLevel;
|
||||
|
||||
// Input string that is being demangled with "_R" prefix removed.
|
||||
StringView Input;
|
||||
// Position in the input string.
|
||||
size_t Position;
|
||||
|
||||
// True if an error occurred.
|
||||
bool Error;
|
||||
|
||||
public:
|
||||
// Demangled output.
|
||||
OutputStream Output;
|
||||
|
||||
Demangler(size_t MaxRecursionLevel = 500);
|
||||
|
||||
bool demangle(StringView MangledName);
|
||||
|
||||
private:
|
||||
void demanglePath();
|
||||
|
||||
Identifier parseIdentifier();
|
||||
void parseOptionalBase62Number(char Tag);
|
||||
uint64_t parseBase62Number();
|
||||
uint64_t parseDecimalNumber();
|
||||
|
||||
void print(StringView S) {
|
||||
if (Error)
|
||||
return;
|
||||
|
||||
Output += S;
|
||||
}
|
||||
|
||||
char look() const {
|
||||
if (Error || Position >= Input.size())
|
||||
return 0;
|
||||
|
||||
return Input[Position];
|
||||
}
|
||||
|
||||
char consume() {
|
||||
if (Error || Position >= Input.size()) {
|
||||
Error = true;
|
||||
return 0;
|
||||
}
|
||||
|
||||
return Input[Position++];
|
||||
}
|
||||
|
||||
bool consumeIf(char Prefix) {
|
||||
if (Error || Position >= Input.size() || Input[Position] != Prefix)
|
||||
return false;
|
||||
|
||||
Position += 1;
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Computes A + B. When computation wraps around sets the error and returns
|
||||
/// false. Otherwise assigns the result to A and returns true.
|
||||
bool addAssign(uint64_t &A, const uint64_t B) {
|
||||
if (A > std::numeric_limits<uint64_t>::max() - B) {
|
||||
Error = true;
|
||||
return false;
|
||||
}
|
||||
|
||||
A += B;
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Computes A * B. When computation wraps around sets the error and returns
|
||||
/// false. Otherwise assigns the result to A and returns true.
|
||||
bool mulAssign(uint64_t &A, const uint64_t B) {
|
||||
if (B != 0 && A > std::numeric_limits<uint64_t>::max() / B) {
|
||||
Error = true;
|
||||
return false;
|
||||
}
|
||||
|
||||
A *= B;
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace rust_demangle
|
||||
} // namespace llvm
|
||||
|
||||
#endif
|
|
@ -3,6 +3,7 @@ add_llvm_component_library(LLVMDemangle
|
|||
ItaniumDemangle.cpp
|
||||
MicrosoftDemangle.cpp
|
||||
MicrosoftDemangleNodes.cpp
|
||||
RustDemangle.cpp
|
||||
|
||||
ADDITIONAL_HEADER_DIRS
|
||||
"${LLVM_MAIN_INCLUDE_DIR}/llvm/Demangle"
|
||||
|
|
|
@ -0,0 +1,276 @@
|
|||
//===--- RustDemangle.cpp ---------------------------------------*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file defines a demangler for Rust v0 mangled symbols as specified in
|
||||
// https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/Demangle/RustDemangle.h"
|
||||
#include "llvm/Demangle/Demangle.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cstring>
|
||||
#include <limits>
|
||||
|
||||
using namespace llvm;
|
||||
using namespace rust_demangle;
|
||||
|
||||
char *llvm::rustDemangle(const char *MangledName, char *Buf, size_t *N,
|
||||
int *Status) {
|
||||
if (MangledName == nullptr || (Buf != nullptr && N == nullptr)) {
|
||||
if (Status != nullptr)
|
||||
*Status = demangle_invalid_args;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Return early if mangled name doesn't look like a Rust symbol.
|
||||
StringView Mangled(MangledName);
|
||||
if (!Mangled.startsWith("_R")) {
|
||||
if (Status != nullptr)
|
||||
*Status = demangle_invalid_mangled_name;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
Demangler D;
|
||||
if (!initializeOutputStream(nullptr, nullptr, D.Output, 1024)) {
|
||||
if (Status != nullptr)
|
||||
*Status = demangle_memory_alloc_failure;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (!D.demangle(Mangled)) {
|
||||
if (Status != nullptr)
|
||||
*Status = demangle_invalid_mangled_name;
|
||||
std::free(D.Output.getBuffer());
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
D.Output += '\0';
|
||||
char *Demangled = D.Output.getBuffer();
|
||||
size_t DemangledLen = D.Output.getCurrentPosition();
|
||||
|
||||
if (Buf != nullptr) {
|
||||
if (DemangledLen <= *N) {
|
||||
std::memcpy(Buf, Demangled, DemangledLen);
|
||||
std::free(Demangled);
|
||||
Demangled = Buf;
|
||||
} else {
|
||||
std::free(Buf);
|
||||
}
|
||||
}
|
||||
|
||||
if (N != nullptr)
|
||||
*N = DemangledLen;
|
||||
|
||||
if (Status != nullptr)
|
||||
*Status = demangle_success;
|
||||
|
||||
return Demangled;
|
||||
}
|
||||
|
||||
Demangler::Demangler(size_t MaxRecursionLevel)
|
||||
: MaxRecursionLevel(MaxRecursionLevel) {}
|
||||
|
||||
static inline bool isDigit(const char C) { return '0' <= C && C <= '9'; }
|
||||
|
||||
static inline bool isLower(const char C) { return 'a' <= C && C <= 'z'; }
|
||||
|
||||
static inline bool isUpper(const char C) { return 'A' <= C && C <= 'Z'; }
|
||||
|
||||
/// Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
|
||||
static inline bool isValid(const char C) {
|
||||
return isDigit(C) || isLower(C) || isUpper(C) || C == '_';
|
||||
}
|
||||
|
||||
// Demangles Rust v0 mangled symbol. Returns true when successful, and false
|
||||
// otherwise. The demangled symbol is stored in Output field. It is
|
||||
// responsibility of the caller to free the memory behind the output stream.
|
||||
//
|
||||
// <symbol-name> = "_R" <path> [<instantiating-crate>]
|
||||
bool Demangler::demangle(StringView Mangled) {
|
||||
Position = 0;
|
||||
Error = false;
|
||||
RecursionLevel = 0;
|
||||
|
||||
if (!Mangled.consumeFront("_R")) {
|
||||
Error = true;
|
||||
return false;
|
||||
}
|
||||
Input = Mangled;
|
||||
|
||||
demanglePath();
|
||||
|
||||
// FIXME parse optional <instantiating-crate>.
|
||||
|
||||
if (Position != Input.size())
|
||||
Error = true;
|
||||
|
||||
return !Error;
|
||||
}
|
||||
|
||||
// <path> = "C" <identifier> // crate root
|
||||
// | "M" <impl-path> <type> // <T> (inherent impl)
|
||||
// | "X" <impl-path> <type> <path> // <T as Trait> (trait impl)
|
||||
// | "Y" <type> <path> // <T as Trait> (trait definition)
|
||||
// | "N" <ns> <path> <identifier> // ...::ident (nested path)
|
||||
// | "I" <path> {<generic-arg>} "E" // ...<T, U> (generic args)
|
||||
// | <backref>
|
||||
// <identifier> = [<disambiguator>] <undisambiguated-identifier>
|
||||
// <ns> = "C" // closure
|
||||
// | "S" // shim
|
||||
// | <A-Z> // other special namespaces
|
||||
// | <a-z> // internal namespaces
|
||||
void Demangler::demanglePath() {
|
||||
if (Error || RecursionLevel >= MaxRecursionLevel) {
|
||||
Error = true;
|
||||
return;
|
||||
}
|
||||
RecursionLevel += 1;
|
||||
|
||||
switch (consume()) {
|
||||
case 'C': {
|
||||
parseOptionalBase62Number('s');
|
||||
Identifier Ident = parseIdentifier();
|
||||
print(Ident.Name);
|
||||
break;
|
||||
}
|
||||
case 'N': {
|
||||
char NS = consume();
|
||||
if (!isLower(NS) && !isUpper(NS)) {
|
||||
Error = true;
|
||||
break;
|
||||
}
|
||||
demanglePath();
|
||||
|
||||
parseOptionalBase62Number('s');
|
||||
Identifier Ident = parseIdentifier();
|
||||
|
||||
if (!Ident.empty()) {
|
||||
// FIXME print special namespaces:
|
||||
// * "C" closures
|
||||
// * "S" shim
|
||||
print("::");
|
||||
print(Ident.Name);
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
// FIXME parse remaining productions.
|
||||
Error = true;
|
||||
break;
|
||||
}
|
||||
|
||||
RecursionLevel -= 1;
|
||||
}
|
||||
|
||||
// <undisambiguated-identifier> = ["u"] <decimal-number> ["_"] <bytes>
|
||||
Identifier Demangler::parseIdentifier() {
|
||||
bool Punycode = consumeIf('u');
|
||||
uint64_t Bytes = parseDecimalNumber();
|
||||
|
||||
// Underscore resolves the ambiguity when identifier starts with a decimal
|
||||
// digit or another underscore.
|
||||
consumeIf('_');
|
||||
|
||||
if (Error || Bytes > Input.size() - Position) {
|
||||
Error = true;
|
||||
return {};
|
||||
}
|
||||
StringView S = Input.substr(Position, Bytes);
|
||||
Position += Bytes;
|
||||
|
||||
if (!std::all_of(S.begin(), S.end(), isValid)) {
|
||||
Error = true;
|
||||
return {};
|
||||
}
|
||||
|
||||
return {S, Punycode};
|
||||
}
|
||||
|
||||
// Parses optional base 62 number. The presence of a number is determined using
|
||||
// Tag.
|
||||
void Demangler::parseOptionalBase62Number(char Tag) {
|
||||
// Parsing result is currently unused.
|
||||
if (consumeIf(Tag))
|
||||
parseBase62Number();
|
||||
}
|
||||
|
||||
// Parses base 62 number with <0-9a-zA-Z> as digits. Number is terminated by
|
||||
// "_". All values are offset by 1, so that "_" encodes 0, "0_" encodes 1,
|
||||
// "1_" encodes 2, etc.
|
||||
//
|
||||
// <base-62-number> = {<0-9a-zA-Z>} "_"
|
||||
uint64_t Demangler::parseBase62Number() {
|
||||
if (consumeIf('_'))
|
||||
return 0;
|
||||
|
||||
uint64_t Value = 0;
|
||||
|
||||
while (true) {
|
||||
uint64_t Digit;
|
||||
char C = consume();
|
||||
|
||||
if (C == '_') {
|
||||
break;
|
||||
} else if (isDigit(C)) {
|
||||
Digit = C - '0';
|
||||
} else if (isLower(C)) {
|
||||
Digit = 10 + (C - 'a');
|
||||
} else if (isUpper(C)) {
|
||||
Digit = 10 + 26 + (C - 'A');
|
||||
} else {
|
||||
Error = true;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!mulAssign(Value, 62))
|
||||
return 0;
|
||||
|
||||
if (!addAssign(Value, Digit))
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!addAssign(Value, 1))
|
||||
return 0;
|
||||
|
||||
return Value;
|
||||
}
|
||||
|
||||
// Parses a decimal number that had been encoded without any leading zeros.
|
||||
//
|
||||
// <decimal-number> = "0"
|
||||
// | <1-9> {<0-9>}
|
||||
uint64_t Demangler::parseDecimalNumber() {
|
||||
char C = look();
|
||||
if (!isDigit(C)) {
|
||||
Error = true;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (C == '0') {
|
||||
consume();
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint64_t Value = 0;
|
||||
|
||||
while (isDigit(look())) {
|
||||
if (!mulAssign(Value, 10)) {
|
||||
Error = true;
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint64_t D = consume() - '0';
|
||||
if (!addAssign(Value, D))
|
||||
return 0;
|
||||
}
|
||||
|
||||
return Value;
|
||||
}
|
|
@ -0,0 +1,43 @@
|
|||
RUN: llvm-cxxfilt -n < %s | FileCheck --match-full-lines %s
|
||||
|
||||
CHECK: a::main
|
||||
_RNvC1a4main
|
||||
|
||||
CHECK: hello::rust
|
||||
_RNvCshGpAVYOtgW1_5hello4rust
|
||||
|
||||
CHECK: a::b::c
|
||||
_RNvNvC1a1b1c
|
||||
|
||||
; Invalid mangled characters
|
||||
|
||||
CHECK: _RNvC2a.1c
|
||||
_RNvC2a.1c
|
||||
|
||||
CHECK: _RNvC2a$1c
|
||||
_RNvC2a$1c
|
||||
|
||||
; Invalid identifier length (UINT64_MAX + 3, which happens to be ok after a wraparound).
|
||||
|
||||
CHECK: _RNvC2ab18446744073709551618xy
|
||||
_RNvC2ab18446744073709551618xy
|
||||
|
||||
; Mangling scheme includes an optional encoding version. When present it would
|
||||
; indicate an encoding we don't support yet. Check that it is rejected:
|
||||
|
||||
CHECK: _R0NvC1a4main
|
||||
_R0NvC1a4main
|
||||
|
||||
; Early EOF
|
||||
|
||||
CHECK: _RNv
|
||||
_RNv
|
||||
|
||||
CHECK: _RNvC
|
||||
_RNvC
|
||||
|
||||
CHECK: _RNvC1a5main
|
||||
_RNvC1a5main
|
||||
|
||||
CHECK: _RNvC1a20abc
|
||||
_RNvC1a20abc
|
|
@ -97,6 +97,11 @@ static std::string demangle(const std::string &Mangled) {
|
|||
Undecorated = itaniumDemangle(DecoratedStr + 6, nullptr, nullptr, &Status);
|
||||
}
|
||||
|
||||
if (!Undecorated &&
|
||||
(DecoratedLength >= 2 && strncmp(DecoratedStr, "_R", 2) == 0)) {
|
||||
Undecorated = rustDemangle(DecoratedStr, nullptr, nullptr, &Status);
|
||||
}
|
||||
|
||||
std::string Result(Undecorated ? Prefix + Undecorated : Mangled);
|
||||
free(Undecorated);
|
||||
return Result;
|
||||
|
|
|
@ -7,5 +7,6 @@ add_llvm_unittest(DemangleTests
|
|||
DemangleTest.cpp
|
||||
ItaniumDemangleTest.cpp
|
||||
PartialDemangleTest.cpp
|
||||
RustDemangleTest.cpp
|
||||
StringViewTest.cpp
|
||||
)
|
||||
|
|
|
@ -0,0 +1,90 @@
|
|||
//===------------------ RustDemangleTest.cpp ------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/Demangle/Demangle.h"
|
||||
#include "gmock/gmock.h"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
#include <cstdlib>
|
||||
|
||||
TEST(RustDemangle, Success) {
|
||||
char *Demangled =
|
||||
llvm::rustDemangle("_RNvC1a4main", nullptr, nullptr, nullptr);
|
||||
EXPECT_STREQ(Demangled, "a::main");
|
||||
std::free(Demangled);
|
||||
|
||||
// With status.
|
||||
int Status = 0;
|
||||
Demangled = llvm::rustDemangle("_RNvC1a4main", nullptr, nullptr, &Status);
|
||||
EXPECT_EQ(Status, llvm::demangle_success);
|
||||
EXPECT_STREQ(Demangled, "a::main");
|
||||
std::free(Demangled);
|
||||
|
||||
// With status and length.
|
||||
size_t N = 0;
|
||||
Demangled = llvm::rustDemangle("_RNvC1a4main", nullptr, &N, &Status);
|
||||
EXPECT_EQ(Status, llvm::demangle_success);
|
||||
EXPECT_EQ(N, 8u);
|
||||
EXPECT_STREQ(Demangled, "a::main");
|
||||
std::free(Demangled);
|
||||
}
|
||||
|
||||
TEST(RustDemangle, Invalid) {
|
||||
int Status = 0;
|
||||
char *Demangled = nullptr;
|
||||
|
||||
// Invalid prefix.
|
||||
Demangled = llvm::rustDemangle("_ABCDEF", nullptr, nullptr, &Status);
|
||||
EXPECT_EQ(Status, llvm::demangle_invalid_mangled_name);
|
||||
EXPECT_EQ(Demangled, nullptr);
|
||||
|
||||
// Correct prefix but still invalid.
|
||||
Demangled = llvm::rustDemangle("_RRR", nullptr, nullptr, &Status);
|
||||
EXPECT_EQ(Status, llvm::demangle_invalid_mangled_name);
|
||||
EXPECT_EQ(Demangled, nullptr);
|
||||
}
|
||||
|
||||
TEST(RustDemangle, OutputBufferWithoutLength) {
|
||||
char *Buffer = static_cast<char *>(std::malloc(1024));
|
||||
ASSERT_NE(Buffer, nullptr);
|
||||
|
||||
int Status = 0;
|
||||
char *Demangled =
|
||||
llvm::rustDemangle("_RNvC1a4main", Buffer, nullptr, &Status);
|
||||
|
||||
EXPECT_EQ(Status, llvm::demangle_invalid_args);
|
||||
EXPECT_EQ(Demangled, nullptr);
|
||||
std::free(Buffer);
|
||||
}
|
||||
|
||||
TEST(RustDemangle, OutputBuffer) {
|
||||
size_t N = 1024;
|
||||
char *Buffer = static_cast<char *>(std::malloc(N));
|
||||
ASSERT_NE(Buffer, nullptr);
|
||||
|
||||
int Status = 0;
|
||||
char *Demangled = llvm::rustDemangle("_RNvC1a4main", Buffer, &N, &Status);
|
||||
|
||||
EXPECT_EQ(Status, llvm::demangle_success);
|
||||
EXPECT_EQ(Demangled, Buffer);
|
||||
EXPECT_STREQ(Demangled, "a::main");
|
||||
std::free(Demangled);
|
||||
}
|
||||
|
||||
TEST(RustDemangle, SmallOutputBuffer) {
|
||||
size_t N = 1;
|
||||
char *Buffer = static_cast<char *>(std::malloc(N));
|
||||
ASSERT_NE(Buffer, nullptr);
|
||||
|
||||
int Status = 0;
|
||||
char *Demangled = llvm::rustDemangle("_RNvC1a4main", Buffer, &N, &Status);
|
||||
|
||||
EXPECT_EQ(Status, llvm::demangle_success);
|
||||
EXPECT_STREQ(Demangled, "a::main");
|
||||
std::free(Demangled);
|
||||
}
|
Loading…
Reference in New Issue