[PR] Fdata: Escape whitespaces in symbol names

Summary:
This patch is part of preparation for golang support. The golang symbols
might have spaces in the name (for example "type..eq.[10]interface {}").
Since fdata uses spaces as a field separator such names brakes the fdata
format, so we need to escape whitespaces and backslashes in symbol names
using the backslash character.

Vladislav Khmelevsky,
Advanced Software Technology Lab, Huawei

(cherry picked from FBD29999491)
This commit is contained in:
Vladislav Khmelevsky 2021-06-29 19:54:08 +03:00 committed by Maksim Panchenko
parent b64de07569
commit 2a5790b670
11 changed files with 174 additions and 18 deletions

View File

@ -2201,9 +2201,8 @@ DataAggregator::writeAggregatedFile(StringRef OutputFilename) const {
OutFile << (Loc.IsSymbol ? "4 " : "3 "); OutFile << (Loc.IsSymbol ? "4 " : "3 ");
else else
OutFile << (Loc.IsSymbol ? "1 " : "0 "); OutFile << (Loc.IsSymbol ? "1 " : "0 ");
OutFile << (Loc.Name.empty() ? "[unknown]" : Loc.Name) << " " OutFile << (Loc.Name.empty() ? "[unknown]" : getEscapedName(Loc.Name))
<< Twine::utohexstr(Loc.Offset) << " " << Twine::utohexstr(Loc.Offset) << FieldSeparator;
<< FieldSeparator;
}; };
uint64_t BranchValues = 0; uint64_t BranchValues = 0;

View File

@ -11,10 +11,10 @@
// //
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
#include "BinaryFunction.h"
#include "DataReader.h" #include "DataReader.h"
#include "BinaryFunction.h"
#include "Passes/MCF.h" #include "Passes/MCF.h"
#include "Utils.h"
#include "llvm/Support/CommandLine.h" #include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h" #include "llvm/Support/Debug.h"
#include <map> #include <map>
@ -62,10 +62,11 @@ bool hasVolatileName(const BinaryFunction &BF) {
return false; return false;
} }
/// Return standard name of the function possibly renamed by BOLT. /// Return standard escaped name of the function possibly renamed by BOLT.
StringRef normalizeName(StringRef Name) { std::string normalizeName(StringRef NameRef) {
// Strip "PG." prefix used for globalized locals. // Strip "PG." prefix used for globalized locals.
return Name.startswith("PG.") ? Name.substr(2) : Name; NameRef = NameRef.startswith("PG.") ? NameRef.substr(2) : NameRef;
return getEscapedName(NameRef);
} }
} // anonymous namespace } // anonymous namespace
@ -859,15 +860,31 @@ bool DataReader::checkAndConsumeNewLine() {
} }
ErrorOr<StringRef> DataReader::parseString(char EndChar, bool EndNl) { ErrorOr<StringRef> DataReader::parseString(char EndChar, bool EndNl) {
std::string EndChars(1, EndChar); if (EndChar == '\\') {
if (EndNl) reportError("EndChar could not be backslash");
EndChars.push_back('\n');
size_t StringEnd = ParsingBuf.find_first_of(EndChars);
if (StringEnd == StringRef::npos || StringEnd == 0) {
reportError("malformed field");
return make_error_code(llvm::errc::io_error); return make_error_code(llvm::errc::io_error);
} }
std::string EndChars(1, EndChar);
EndChars.push_back('\\');
if (EndNl)
EndChars.push_back('\n');
size_t StringEnd = 0;
do {
StringEnd = ParsingBuf.find_first_of(EndChars, StringEnd);
if (StringEnd == StringRef::npos ||
(StringEnd == 0 && ParsingBuf[StringEnd] != '\\')) {
reportError("malformed field");
return make_error_code(llvm::errc::io_error);
}
if (ParsingBuf[StringEnd] != '\\')
break;
StringEnd += 2;
} while (1);
StringRef Str = ParsingBuf.substr(0, StringEnd); StringRef Str = ParsingBuf.substr(0, StringEnd);
// If EndNl was set and nl was found instead of EndChar, do not consume the // If EndNl was set and nl was found instead of EndChar, do not consume the
@ -1317,8 +1334,7 @@ fetchMapEntriesRegex(
// Do a reverse order iteration since the name in profile has a higher chance // Do a reverse order iteration since the name in profile has a higher chance
// of matching a name at the end of the list. // of matching a name at the end of the list.
for (auto FI = FuncNames.rbegin(), FE = FuncNames.rend(); FI != FE; ++FI) { for (auto FI = FuncNames.rbegin(), FE = FuncNames.rend(); FI != FE; ++FI) {
StringRef Name = *FI; std::string Name = normalizeName(*FI);
Name = normalizeName(Name);
const Optional<StringRef> LTOCommonName = getLTOCommonName(Name); const Optional<StringRef> LTOCommonName = getLTOCommonName(Name);
if (LTOCommonName) { if (LTOCommonName) {
auto I = LTOCommonNameMap.find(*LTOCommonName); auto I = LTOCommonNameMap.find(*LTOCommonName);

View File

@ -11,6 +11,7 @@
#include "Instrumentation.h" #include "Instrumentation.h"
#include "ParallelUtilities.h" #include "ParallelUtilities.h"
#include "RuntimeLibs/InstrumentationRuntimeLibrary.h" #include "RuntimeLibs/InstrumentationRuntimeLibrary.h"
#include "Utils.h"
#include "llvm/Support/CommandLine.h" #include "llvm/Support/CommandLine.h"
#include <stack> #include <stack>
@ -90,7 +91,7 @@ uint32_t Instrumentation::getFunctionNameIndex(const BinaryFunction &Function) {
return Iter->second; return Iter->second;
size_t Idx = Summary->StringTable.size(); size_t Idx = Summary->StringTable.size();
FuncToStringIdx.emplace(std::make_pair(&Function, Idx)); FuncToStringIdx.emplace(std::make_pair(&Function, Idx));
Summary->StringTable.append(std::string(Function.getOneName())); Summary->StringTable.append(getEscapedName(Function.getOneName()));
Summary->StringTable.append(1, '\0'); Summary->StringTable.append(1, '\0');
return Idx; return Idx;
} }

View File

@ -48,6 +48,26 @@ void check_error(Error E, Twine Message) {
}); });
} }
std::string getEscapedName(const StringRef &Name) {
std::string Output = Name.str();
for (size_t I = 0; I < Output.size(); ++I) {
if (Output[I] == ' ' || Output[I] == '\\')
Output.insert(I++, 1, '\\');
}
return Output;
}
std::string getUnescapedName(const StringRef &Name) {
std::string Output = Name.str();
for (size_t I = 0; I < Output.size(); ++I) {
if (Output[I] == '\\')
Output.erase(I++, 1);
}
return Output;
}
Optional<uint8_t> readDWARFExpressionTargetReg(StringRef ExprBytes) { Optional<uint8_t> readDWARFExpressionTargetReg(StringRef ExprBytes) {
uint8_t Opcode = ExprBytes[0]; uint8_t Opcode = ExprBytes[0];
if (Opcode == dwarf::DW_CFA_def_cfa_expression) if (Opcode == dwarf::DW_CFA_def_cfa_expression)

View File

@ -35,6 +35,12 @@ void check_error(std::error_code EC, StringRef Message);
void check_error(Error E, Twine Message); void check_error(Error E, Twine Message);
/// Return the name with escaped whitespace and backslash characters
std::string getEscapedName(const StringRef &Name);
/// Return the unescaped name
std::string getUnescapedName(const StringRef &Name);
// Determines which register a given DWARF expression is being assigned to. // Determines which register a given DWARF expression is being assigned to.
// If the expression is defining the CFA, return NoneType. // If the expression is defining the CFA, return NoneType.
Optional<uint8_t> readDWARFExpressionTargetReg(StringRef ExprBytes); Optional<uint8_t> readDWARFExpressionTargetReg(StringRef ExprBytes);

View File

@ -10,6 +10,7 @@ set(BOLT_TEST_PARAMS
) )
list(APPEND BOLT_TEST_DEPS list(APPEND BOLT_TEST_DEPS
llc
llvm-config llvm-config
FileCheck count not FileCheck count not
llvm-bolt llvm-bolt
@ -22,6 +23,7 @@ list(APPEND BOLT_TEST_DEPS
llvm-strip llvm-strip
llvm-dwarfdump llvm-dwarfdump
llvm-mc llvm-mc
llvm-objcopy
) )
add_custom_target(bolt-test-depends DEPENDS ${BOLT_TEST_DEPS}) add_custom_target(bolt-test-depends DEPENDS ${BOLT_TEST_DEPS})

View File

@ -0,0 +1,3 @@
symb_w_whitespace symb whitespace
symb_backslash_b symb backslash\
static_symb_backslash_b static symb backslash\

View File

@ -0,0 +1,6 @@
# PREAGR: B #main# #static symb backslash\# 1 0
# PREAGR: B #static symb backslash\# #symb whitespace# 1 0
# PREAGR: B #main# #symb whitespace# 1 0
# PREAGR: B #main# #symb backslash\# 1 0
# PREAGR: B #main# #symb backslash\# 1 0
# PREAGR: B #symb backslash\# #symb whitespace# 2 0

View File

@ -0,0 +1,101 @@
@var = dso_local global i32 0, align 4
; Function Attrs: noinline nounwind optnone uwtable
define dso_local void @symb_w_whitespace() #0 {
store volatile i32 1, i32* @var, align 4
ret void
}
; Function Attrs: noinline nounwind optnone uwtable
define dso_local void @symb_backslash_b() #0 {
call void @symb_w_whitespace()
store volatile i32 2, i32* @var, align 4
ret void
}
; Function Attrs: noinline nounwind optnone uwtable
define dso_local i32 @main() #0 {
%1 = alloca i32, align 4
%2 = alloca i32, align 4
store i32 0, i32* %1, align 4
call void @static_symb_backslash_b()
call void @symb_w_whitespace()
store i32 0, i32* %2, align 4
br label %3
3: ; preds = %7, %0
%4 = load i32, i32* %2, align 4
%5 = icmp slt i32 %4, 2
br i1 %5, label %6, label %10
6: ; preds = %3
call void @symb_backslash_b()
br label %7
7: ; preds = %6
%8 = load i32, i32* %2, align 4
%9 = add nsw i32 %8, 1
store i32 %9, i32* %2, align 4
br label %3
10: ; preds = %3
%11 = load i32, i32* %1, align 4
ret i32 %11
}
; Function Attrs: noinline nounwind optnone uwtable
define internal void @static_symb_backslash_b() #0 {
call void @symb_w_whitespace()
store volatile i32 3, i32* @var, align 4
ret void
}
; REQUIRES: system-linux
; RUN: llc %s -o %t.s
; RUN: %host_cc %cflags -O0 %t.s -o %t.exe -Wl,-q
; RUN: llvm-objcopy --redefine-syms=%p/Inputs/fdata-escape-chars-syms.txt %t.exe
;
; RUN: llvm-bolt %t.exe -o %t.exe.instrumented -instrument \
; RUN: -instrumentation-file=%t.fdata
; RUN: %t.exe.instrumented
; RUN: cat %t.fdata | \
; RUN: FileCheck --check-prefix="FDATA_CHECK" %s
; RUN: llvm-bolt %t.exe -o %t.fdata.exe -data %t.fdata -print-finalized | \
; RUN: FileCheck --check-prefix="INSTR_CHECK" %s
;
; RUN: link_fdata %p/Inputs/fdata-escape-chars.txt %t.exe %t.pre "PREAGR"
; RUN: perf2bolt %t.exe -o %t.pre.fdata -pa -p %t.pre
; RUN: cat %t.pre.fdata | FileCheck --check-prefix="PREAGR_FDATA_CHECK" %s
; RUN: llvm-bolt %t.exe -o %t.pre.fdata.exe -data %t.pre.fdata -print-finalized | \
; RUN: FileCheck --check-prefix="PREAGR_CHECK" %s
; FDATA_CHECK: 1 symb\ backslash\\ {{([[:xdigit:]]+)}} 1 symb\ whitespace 0 0 2
; FDATA_CHECK: 1 main {{([[:xdigit:]]+)}} 1 symb\ whitespace 0 0 1
; FDATA_CHECK: 1 main {{([[:xdigit:]]+)}} 1 symb\ backslash\\ 0 0 2
; INSTR_CHECK: Binary Function "symb whitespace"
; INSTR_CHECK: Exec Count : 4
; INSTR_CHECK: Binary Function "symb backslash\"
; INSTR_CHECK: Exec Count : 2
; INSTR_CHECK: {{([[:xdigit:]]+)}}: callq "symb whitespace" # Count: 2
; INSTR_CHECK: Binary Function "main"
; INSTR_CHECK: Exec Count : 1
; INSTR_CHECK: {{([[:xdigit:]]+)}}: callq "symb whitespace" # Count: 1
; INSTR_CHECK: {{([[:xdigit:]]+)}}: callq "symb backslash\" # Count: 2
; INSTR_CHECK: Binary Function "static symb backslash\/1(*2)"
; INSTR_CHECK: Exec Count : 1
; INSTR_CHECK: {{([[:xdigit:]]+)}}: callq "symb whitespace" # Count: 1
; PREAGR_FDATA_CHECK: 1 symb\ backslash\\ 0 1 symb\ whitespace 0 0 2
; PREAGR_FDATA_CHECK: 1 main 0 1 static\ symb\ backslash\\/1 0 0 1
; PREAGR_FDATA_CHECK: 1 main 0 1 symb\ whitespace 0 0 1
; PREAGR_FDATA_CHECK: 1 main 0 1 symb\ backslash\\ 0 0 2
; PREAGR_FDATA_CHECK: 1 static\ symb\ backslash\\/1 0 1 symb\ whitespace 0 0 1
; PREAGR_CHECK: Binary Function "symb whitespace"
; PREAGR_CHECK-DAG: Exec Count : 4
; PREAGR_CHECK: Binary Function "symb backslash\"
; PREAGR_CHECK-DAG: Exec Count : 2
; PREAGR_CHECK: Binary Function "static symb backslash\/1(*2)"
; PREAGR_CHECK-DAG: Exec Count : 1

View File

@ -7,7 +7,7 @@ mapfile -t symbols < <(nm --defined-only "$2")
for line in "${symbols[@]}"; do for line in "${symbols[@]}"; do
val=$(echo $line | cut -d' ' -f1) val=$(echo $line | cut -d' ' -f1)
symname=$(echo $line | cut -d' ' -f3) symname=$(echo $line | awk '{ $1=$2=""; print $0 }' | sed 's/^[ \t]*//')
if [ -z "$symname" ]; then if [ -z "$symname" ]; then
continue continue
fi fi

View File

@ -51,6 +51,7 @@ tool_dirs = [config.llvm_tools_dir,
config.test_source_root] config.test_source_root]
tools = [ tools = [
ToolSubst('llc', unresolved='fatal'),
ToolSubst('llvm-dwarfdump', unresolved='fatal'), ToolSubst('llvm-dwarfdump', unresolved='fatal'),
ToolSubst('llvm-bolt', unresolved='fatal'), ToolSubst('llvm-bolt', unresolved='fatal'),
ToolSubst('perf2bolt', unresolved='fatal'), ToolSubst('perf2bolt', unresolved='fatal'),
@ -58,6 +59,7 @@ tools = [
ToolSubst('llvm-mc', unresolved='fatal'), ToolSubst('llvm-mc', unresolved='fatal'),
ToolSubst('llvm-nm', unresolved='fatal'), ToolSubst('llvm-nm', unresolved='fatal'),
ToolSubst('llvm-objdump', unresolved='fatal'), ToolSubst('llvm-objdump', unresolved='fatal'),
ToolSubst('llvm-objcopy', unresolved='fatal'),
ToolSubst('llvm-strip', unresolved='fatal'), ToolSubst('llvm-strip', unresolved='fatal'),
ToolSubst('link_fdata', command=FindTool('link_fdata.sh'), unresolved='fatal'), ToolSubst('link_fdata', command=FindTool('link_fdata.sh'), unresolved='fatal'),
] ]