From 2a5790b670a928a07f5dedab41298dbb803d001b Mon Sep 17 00:00:00 2001 From: Vladislav Khmelevsky Date: Tue, 29 Jun 2021 19:54:08 +0300 Subject: [PATCH] [PR] Fdata: Escape whitespaces in symbol names Summary: This patch is part of preparation for golang support. The golang symbols might have spaces in the name (for example "type..eq.[10]interface {}"). Since fdata uses spaces as a field separator such names brakes the fdata format, so we need to escape whitespaces and backslashes in symbol names using the backslash character. Vladislav Khmelevsky, Advanced Software Technology Lab, Huawei (cherry picked from FBD29999491) --- bolt/src/DataAggregator.cpp | 5 +- bolt/src/DataReader.cpp | 42 +++++--- bolt/src/Passes/Instrumentation.cpp | 3 +- bolt/src/Utils.cpp | 20 ++++ bolt/src/Utils.h | 6 ++ bolt/test/CMakeLists.txt | 2 + .../X86/Inputs/fdata-escape-chars-syms.txt | 3 + bolt/test/X86/Inputs/fdata-escape-chars.txt | 6 ++ bolt/test/X86/fdata-escape-chars.ll | 101 ++++++++++++++++++ bolt/test/link_fdata.sh | 2 +- bolt/test/lit.cfg.py | 2 + 11 files changed, 174 insertions(+), 18 deletions(-) create mode 100644 bolt/test/X86/Inputs/fdata-escape-chars-syms.txt create mode 100644 bolt/test/X86/Inputs/fdata-escape-chars.txt create mode 100644 bolt/test/X86/fdata-escape-chars.ll diff --git a/bolt/src/DataAggregator.cpp b/bolt/src/DataAggregator.cpp index d560ce11ebec..d9abf3f8e992 100644 --- a/bolt/src/DataAggregator.cpp +++ b/bolt/src/DataAggregator.cpp @@ -2201,9 +2201,8 @@ DataAggregator::writeAggregatedFile(StringRef OutputFilename) const { OutFile << (Loc.IsSymbol ? "4 " : "3 "); else OutFile << (Loc.IsSymbol ? "1 " : "0 "); - OutFile << (Loc.Name.empty() ? "[unknown]" : Loc.Name) << " " - << Twine::utohexstr(Loc.Offset) - << FieldSeparator; + OutFile << (Loc.Name.empty() ? "[unknown]" : getEscapedName(Loc.Name)) + << " " << Twine::utohexstr(Loc.Offset) << FieldSeparator; }; uint64_t BranchValues = 0; diff --git a/bolt/src/DataReader.cpp b/bolt/src/DataReader.cpp index 42b995b19586..9ceebda344a6 100644 --- a/bolt/src/DataReader.cpp +++ b/bolt/src/DataReader.cpp @@ -11,10 +11,10 @@ // //===----------------------------------------------------------------------===// - -#include "BinaryFunction.h" #include "DataReader.h" +#include "BinaryFunction.h" #include "Passes/MCF.h" +#include "Utils.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include @@ -62,10 +62,11 @@ bool hasVolatileName(const BinaryFunction &BF) { return false; } -/// Return standard name of the function possibly renamed by BOLT. -StringRef normalizeName(StringRef Name) { +/// Return standard escaped name of the function possibly renamed by BOLT. +std::string normalizeName(StringRef NameRef) { // Strip "PG." prefix used for globalized locals. - return Name.startswith("PG.") ? Name.substr(2) : Name; + NameRef = NameRef.startswith("PG.") ? NameRef.substr(2) : NameRef; + return getEscapedName(NameRef); } } // anonymous namespace @@ -859,15 +860,31 @@ bool DataReader::checkAndConsumeNewLine() { } ErrorOr DataReader::parseString(char EndChar, bool EndNl) { - std::string EndChars(1, EndChar); - if (EndNl) - EndChars.push_back('\n'); - size_t StringEnd = ParsingBuf.find_first_of(EndChars); - if (StringEnd == StringRef::npos || StringEnd == 0) { - reportError("malformed field"); + if (EndChar == '\\') { + reportError("EndChar could not be backslash"); return make_error_code(llvm::errc::io_error); } + std::string EndChars(1, EndChar); + EndChars.push_back('\\'); + if (EndNl) + EndChars.push_back('\n'); + + size_t StringEnd = 0; + do { + StringEnd = ParsingBuf.find_first_of(EndChars, StringEnd); + if (StringEnd == StringRef::npos || + (StringEnd == 0 && ParsingBuf[StringEnd] != '\\')) { + reportError("malformed field"); + return make_error_code(llvm::errc::io_error); + } + + if (ParsingBuf[StringEnd] != '\\') + break; + + StringEnd += 2; + } while (1); + StringRef Str = ParsingBuf.substr(0, StringEnd); // If EndNl was set and nl was found instead of EndChar, do not consume the @@ -1317,8 +1334,7 @@ fetchMapEntriesRegex( // Do a reverse order iteration since the name in profile has a higher chance // of matching a name at the end of the list. for (auto FI = FuncNames.rbegin(), FE = FuncNames.rend(); FI != FE; ++FI) { - StringRef Name = *FI; - Name = normalizeName(Name); + std::string Name = normalizeName(*FI); const Optional LTOCommonName = getLTOCommonName(Name); if (LTOCommonName) { auto I = LTOCommonNameMap.find(*LTOCommonName); diff --git a/bolt/src/Passes/Instrumentation.cpp b/bolt/src/Passes/Instrumentation.cpp index cc882f557daa..b116ce5cecc9 100644 --- a/bolt/src/Passes/Instrumentation.cpp +++ b/bolt/src/Passes/Instrumentation.cpp @@ -11,6 +11,7 @@ #include "Instrumentation.h" #include "ParallelUtilities.h" #include "RuntimeLibs/InstrumentationRuntimeLibrary.h" +#include "Utils.h" #include "llvm/Support/CommandLine.h" #include @@ -90,7 +91,7 @@ uint32_t Instrumentation::getFunctionNameIndex(const BinaryFunction &Function) { return Iter->second; size_t Idx = Summary->StringTable.size(); FuncToStringIdx.emplace(std::make_pair(&Function, Idx)); - Summary->StringTable.append(std::string(Function.getOneName())); + Summary->StringTable.append(getEscapedName(Function.getOneName())); Summary->StringTable.append(1, '\0'); return Idx; } diff --git a/bolt/src/Utils.cpp b/bolt/src/Utils.cpp index ae8ddbd28706..75693f2e8377 100644 --- a/bolt/src/Utils.cpp +++ b/bolt/src/Utils.cpp @@ -48,6 +48,26 @@ void check_error(Error E, Twine Message) { }); } +std::string getEscapedName(const StringRef &Name) { + std::string Output = Name.str(); + for (size_t I = 0; I < Output.size(); ++I) { + if (Output[I] == ' ' || Output[I] == '\\') + Output.insert(I++, 1, '\\'); + } + + return Output; +} + +std::string getUnescapedName(const StringRef &Name) { + std::string Output = Name.str(); + for (size_t I = 0; I < Output.size(); ++I) { + if (Output[I] == '\\') + Output.erase(I++, 1); + } + + return Output; +} + Optional readDWARFExpressionTargetReg(StringRef ExprBytes) { uint8_t Opcode = ExprBytes[0]; if (Opcode == dwarf::DW_CFA_def_cfa_expression) diff --git a/bolt/src/Utils.h b/bolt/src/Utils.h index d26557150a21..d8fd6e7e308a 100644 --- a/bolt/src/Utils.h +++ b/bolt/src/Utils.h @@ -35,6 +35,12 @@ void check_error(std::error_code EC, StringRef Message); void check_error(Error E, Twine Message); +/// Return the name with escaped whitespace and backslash characters +std::string getEscapedName(const StringRef &Name); + +/// Return the unescaped name +std::string getUnescapedName(const StringRef &Name); + // Determines which register a given DWARF expression is being assigned to. // If the expression is defining the CFA, return NoneType. Optional readDWARFExpressionTargetReg(StringRef ExprBytes); diff --git a/bolt/test/CMakeLists.txt b/bolt/test/CMakeLists.txt index 84129d2250f0..b492a46d37e5 100644 --- a/bolt/test/CMakeLists.txt +++ b/bolt/test/CMakeLists.txt @@ -10,6 +10,7 @@ set(BOLT_TEST_PARAMS ) list(APPEND BOLT_TEST_DEPS + llc llvm-config FileCheck count not llvm-bolt @@ -22,6 +23,7 @@ list(APPEND BOLT_TEST_DEPS llvm-strip llvm-dwarfdump llvm-mc + llvm-objcopy ) add_custom_target(bolt-test-depends DEPENDS ${BOLT_TEST_DEPS}) diff --git a/bolt/test/X86/Inputs/fdata-escape-chars-syms.txt b/bolt/test/X86/Inputs/fdata-escape-chars-syms.txt new file mode 100644 index 000000000000..bccbc131d8b2 --- /dev/null +++ b/bolt/test/X86/Inputs/fdata-escape-chars-syms.txt @@ -0,0 +1,3 @@ +symb_w_whitespace symb whitespace +symb_backslash_b symb backslash\ +static_symb_backslash_b static symb backslash\ diff --git a/bolt/test/X86/Inputs/fdata-escape-chars.txt b/bolt/test/X86/Inputs/fdata-escape-chars.txt new file mode 100644 index 000000000000..076793dc9e42 --- /dev/null +++ b/bolt/test/X86/Inputs/fdata-escape-chars.txt @@ -0,0 +1,6 @@ +# PREAGR: B #main# #static symb backslash\# 1 0 +# PREAGR: B #static symb backslash\# #symb whitespace# 1 0 +# PREAGR: B #main# #symb whitespace# 1 0 +# PREAGR: B #main# #symb backslash\# 1 0 +# PREAGR: B #main# #symb backslash\# 1 0 +# PREAGR: B #symb backslash\# #symb whitespace# 2 0 diff --git a/bolt/test/X86/fdata-escape-chars.ll b/bolt/test/X86/fdata-escape-chars.ll new file mode 100644 index 000000000000..d9e7401421c4 --- /dev/null +++ b/bolt/test/X86/fdata-escape-chars.ll @@ -0,0 +1,101 @@ +@var = dso_local global i32 0, align 4 + +; Function Attrs: noinline nounwind optnone uwtable +define dso_local void @symb_w_whitespace() #0 { + store volatile i32 1, i32* @var, align 4 + ret void +} + +; Function Attrs: noinline nounwind optnone uwtable +define dso_local void @symb_backslash_b() #0 { + call void @symb_w_whitespace() + store volatile i32 2, i32* @var, align 4 + ret void +} + +; Function Attrs: noinline nounwind optnone uwtable +define dso_local i32 @main() #0 { + %1 = alloca i32, align 4 + %2 = alloca i32, align 4 + store i32 0, i32* %1, align 4 + call void @static_symb_backslash_b() + call void @symb_w_whitespace() + store i32 0, i32* %2, align 4 + br label %3 + +3: ; preds = %7, %0 + %4 = load i32, i32* %2, align 4 + %5 = icmp slt i32 %4, 2 + br i1 %5, label %6, label %10 + +6: ; preds = %3 + call void @symb_backslash_b() + br label %7 + +7: ; preds = %6 + %8 = load i32, i32* %2, align 4 + %9 = add nsw i32 %8, 1 + store i32 %9, i32* %2, align 4 + br label %3 + +10: ; preds = %3 + %11 = load i32, i32* %1, align 4 + ret i32 %11 +} + +; Function Attrs: noinline nounwind optnone uwtable +define internal void @static_symb_backslash_b() #0 { + call void @symb_w_whitespace() + store volatile i32 3, i32* @var, align 4 + ret void +} + +; REQUIRES: system-linux + +; RUN: llc %s -o %t.s +; RUN: %host_cc %cflags -O0 %t.s -o %t.exe -Wl,-q +; RUN: llvm-objcopy --redefine-syms=%p/Inputs/fdata-escape-chars-syms.txt %t.exe +; +; RUN: llvm-bolt %t.exe -o %t.exe.instrumented -instrument \ +; RUN: -instrumentation-file=%t.fdata +; RUN: %t.exe.instrumented +; RUN: cat %t.fdata | \ +; RUN: FileCheck --check-prefix="FDATA_CHECK" %s +; RUN: llvm-bolt %t.exe -o %t.fdata.exe -data %t.fdata -print-finalized | \ +; RUN: FileCheck --check-prefix="INSTR_CHECK" %s +; +; RUN: link_fdata %p/Inputs/fdata-escape-chars.txt %t.exe %t.pre "PREAGR" +; RUN: perf2bolt %t.exe -o %t.pre.fdata -pa -p %t.pre +; RUN: cat %t.pre.fdata | FileCheck --check-prefix="PREAGR_FDATA_CHECK" %s +; RUN: llvm-bolt %t.exe -o %t.pre.fdata.exe -data %t.pre.fdata -print-finalized | \ +; RUN: FileCheck --check-prefix="PREAGR_CHECK" %s + +; FDATA_CHECK: 1 symb\ backslash\\ {{([[:xdigit:]]+)}} 1 symb\ whitespace 0 0 2 +; FDATA_CHECK: 1 main {{([[:xdigit:]]+)}} 1 symb\ whitespace 0 0 1 +; FDATA_CHECK: 1 main {{([[:xdigit:]]+)}} 1 symb\ backslash\\ 0 0 2 + +; INSTR_CHECK: Binary Function "symb whitespace" +; INSTR_CHECK: Exec Count : 4 +; INSTR_CHECK: Binary Function "symb backslash\" +; INSTR_CHECK: Exec Count : 2 +; INSTR_CHECK: {{([[:xdigit:]]+)}}: callq "symb whitespace" # Count: 2 +; INSTR_CHECK: Binary Function "main" +; INSTR_CHECK: Exec Count : 1 +; INSTR_CHECK: {{([[:xdigit:]]+)}}: callq "symb whitespace" # Count: 1 +; INSTR_CHECK: {{([[:xdigit:]]+)}}: callq "symb backslash\" # Count: 2 +; INSTR_CHECK: Binary Function "static symb backslash\/1(*2)" +; INSTR_CHECK: Exec Count : 1 +; INSTR_CHECK: {{([[:xdigit:]]+)}}: callq "symb whitespace" # Count: 1 + +; PREAGR_FDATA_CHECK: 1 symb\ backslash\\ 0 1 symb\ whitespace 0 0 2 +; PREAGR_FDATA_CHECK: 1 main 0 1 static\ symb\ backslash\\/1 0 0 1 +; PREAGR_FDATA_CHECK: 1 main 0 1 symb\ whitespace 0 0 1 +; PREAGR_FDATA_CHECK: 1 main 0 1 symb\ backslash\\ 0 0 2 +; PREAGR_FDATA_CHECK: 1 static\ symb\ backslash\\/1 0 1 symb\ whitespace 0 0 1 + +; PREAGR_CHECK: Binary Function "symb whitespace" +; PREAGR_CHECK-DAG: Exec Count : 4 +; PREAGR_CHECK: Binary Function "symb backslash\" +; PREAGR_CHECK-DAG: Exec Count : 2 +; PREAGR_CHECK: Binary Function "static symb backslash\/1(*2)" +; PREAGR_CHECK-DAG: Exec Count : 1 diff --git a/bolt/test/link_fdata.sh b/bolt/test/link_fdata.sh index 4f3b0c16d4ca..348dadc2957e 100755 --- a/bolt/test/link_fdata.sh +++ b/bolt/test/link_fdata.sh @@ -7,7 +7,7 @@ mapfile -t symbols < <(nm --defined-only "$2") for line in "${symbols[@]}"; do val=$(echo $line | cut -d' ' -f1) - symname=$(echo $line | cut -d' ' -f3) + symname=$(echo $line | awk '{ $1=$2=""; print $0 }' | sed 's/^[ \t]*//') if [ -z "$symname" ]; then continue fi diff --git a/bolt/test/lit.cfg.py b/bolt/test/lit.cfg.py index b00714e9c45b..5633b8092bb1 100644 --- a/bolt/test/lit.cfg.py +++ b/bolt/test/lit.cfg.py @@ -51,6 +51,7 @@ tool_dirs = [config.llvm_tools_dir, config.test_source_root] tools = [ + ToolSubst('llc', unresolved='fatal'), ToolSubst('llvm-dwarfdump', unresolved='fatal'), ToolSubst('llvm-bolt', unresolved='fatal'), ToolSubst('perf2bolt', unresolved='fatal'), @@ -58,6 +59,7 @@ tools = [ ToolSubst('llvm-mc', unresolved='fatal'), ToolSubst('llvm-nm', unresolved='fatal'), ToolSubst('llvm-objdump', unresolved='fatal'), + ToolSubst('llvm-objcopy', unresolved='fatal'), ToolSubst('llvm-strip', unresolved='fatal'), ToolSubst('link_fdata', command=FindTool('link_fdata.sh'), unresolved='fatal'), ]