diff --git a/bolt/src/DataAggregator.cpp b/bolt/src/DataAggregator.cpp index d560ce11ebec..d9abf3f8e992 100644 --- a/bolt/src/DataAggregator.cpp +++ b/bolt/src/DataAggregator.cpp @@ -2201,9 +2201,8 @@ DataAggregator::writeAggregatedFile(StringRef OutputFilename) const { OutFile << (Loc.IsSymbol ? "4 " : "3 "); else OutFile << (Loc.IsSymbol ? "1 " : "0 "); - OutFile << (Loc.Name.empty() ? "[unknown]" : Loc.Name) << " " - << Twine::utohexstr(Loc.Offset) - << FieldSeparator; + OutFile << (Loc.Name.empty() ? "[unknown]" : getEscapedName(Loc.Name)) + << " " << Twine::utohexstr(Loc.Offset) << FieldSeparator; }; uint64_t BranchValues = 0; diff --git a/bolt/src/DataReader.cpp b/bolt/src/DataReader.cpp index 42b995b19586..9ceebda344a6 100644 --- a/bolt/src/DataReader.cpp +++ b/bolt/src/DataReader.cpp @@ -11,10 +11,10 @@ // //===----------------------------------------------------------------------===// - -#include "BinaryFunction.h" #include "DataReader.h" +#include "BinaryFunction.h" #include "Passes/MCF.h" +#include "Utils.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include @@ -62,10 +62,11 @@ bool hasVolatileName(const BinaryFunction &BF) { return false; } -/// Return standard name of the function possibly renamed by BOLT. -StringRef normalizeName(StringRef Name) { +/// Return standard escaped name of the function possibly renamed by BOLT. +std::string normalizeName(StringRef NameRef) { // Strip "PG." prefix used for globalized locals. - return Name.startswith("PG.") ? Name.substr(2) : Name; + NameRef = NameRef.startswith("PG.") ? NameRef.substr(2) : NameRef; + return getEscapedName(NameRef); } } // anonymous namespace @@ -859,15 +860,31 @@ bool DataReader::checkAndConsumeNewLine() { } ErrorOr DataReader::parseString(char EndChar, bool EndNl) { - std::string EndChars(1, EndChar); - if (EndNl) - EndChars.push_back('\n'); - size_t StringEnd = ParsingBuf.find_first_of(EndChars); - if (StringEnd == StringRef::npos || StringEnd == 0) { - reportError("malformed field"); + if (EndChar == '\\') { + reportError("EndChar could not be backslash"); return make_error_code(llvm::errc::io_error); } + std::string EndChars(1, EndChar); + EndChars.push_back('\\'); + if (EndNl) + EndChars.push_back('\n'); + + size_t StringEnd = 0; + do { + StringEnd = ParsingBuf.find_first_of(EndChars, StringEnd); + if (StringEnd == StringRef::npos || + (StringEnd == 0 && ParsingBuf[StringEnd] != '\\')) { + reportError("malformed field"); + return make_error_code(llvm::errc::io_error); + } + + if (ParsingBuf[StringEnd] != '\\') + break; + + StringEnd += 2; + } while (1); + StringRef Str = ParsingBuf.substr(0, StringEnd); // If EndNl was set and nl was found instead of EndChar, do not consume the @@ -1317,8 +1334,7 @@ fetchMapEntriesRegex( // Do a reverse order iteration since the name in profile has a higher chance // of matching a name at the end of the list. for (auto FI = FuncNames.rbegin(), FE = FuncNames.rend(); FI != FE; ++FI) { - StringRef Name = *FI; - Name = normalizeName(Name); + std::string Name = normalizeName(*FI); const Optional LTOCommonName = getLTOCommonName(Name); if (LTOCommonName) { auto I = LTOCommonNameMap.find(*LTOCommonName); diff --git a/bolt/src/Passes/Instrumentation.cpp b/bolt/src/Passes/Instrumentation.cpp index cc882f557daa..b116ce5cecc9 100644 --- a/bolt/src/Passes/Instrumentation.cpp +++ b/bolt/src/Passes/Instrumentation.cpp @@ -11,6 +11,7 @@ #include "Instrumentation.h" #include "ParallelUtilities.h" #include "RuntimeLibs/InstrumentationRuntimeLibrary.h" +#include "Utils.h" #include "llvm/Support/CommandLine.h" #include @@ -90,7 +91,7 @@ uint32_t Instrumentation::getFunctionNameIndex(const BinaryFunction &Function) { return Iter->second; size_t Idx = Summary->StringTable.size(); FuncToStringIdx.emplace(std::make_pair(&Function, Idx)); - Summary->StringTable.append(std::string(Function.getOneName())); + Summary->StringTable.append(getEscapedName(Function.getOneName())); Summary->StringTable.append(1, '\0'); return Idx; } diff --git a/bolt/src/Utils.cpp b/bolt/src/Utils.cpp index ae8ddbd28706..75693f2e8377 100644 --- a/bolt/src/Utils.cpp +++ b/bolt/src/Utils.cpp @@ -48,6 +48,26 @@ void check_error(Error E, Twine Message) { }); } +std::string getEscapedName(const StringRef &Name) { + std::string Output = Name.str(); + for (size_t I = 0; I < Output.size(); ++I) { + if (Output[I] == ' ' || Output[I] == '\\') + Output.insert(I++, 1, '\\'); + } + + return Output; +} + +std::string getUnescapedName(const StringRef &Name) { + std::string Output = Name.str(); + for (size_t I = 0; I < Output.size(); ++I) { + if (Output[I] == '\\') + Output.erase(I++, 1); + } + + return Output; +} + Optional readDWARFExpressionTargetReg(StringRef ExprBytes) { uint8_t Opcode = ExprBytes[0]; if (Opcode == dwarf::DW_CFA_def_cfa_expression) diff --git a/bolt/src/Utils.h b/bolt/src/Utils.h index d26557150a21..d8fd6e7e308a 100644 --- a/bolt/src/Utils.h +++ b/bolt/src/Utils.h @@ -35,6 +35,12 @@ void check_error(std::error_code EC, StringRef Message); void check_error(Error E, Twine Message); +/// Return the name with escaped whitespace and backslash characters +std::string getEscapedName(const StringRef &Name); + +/// Return the unescaped name +std::string getUnescapedName(const StringRef &Name); + // Determines which register a given DWARF expression is being assigned to. // If the expression is defining the CFA, return NoneType. Optional readDWARFExpressionTargetReg(StringRef ExprBytes); diff --git a/bolt/test/CMakeLists.txt b/bolt/test/CMakeLists.txt index 84129d2250f0..b492a46d37e5 100644 --- a/bolt/test/CMakeLists.txt +++ b/bolt/test/CMakeLists.txt @@ -10,6 +10,7 @@ set(BOLT_TEST_PARAMS ) list(APPEND BOLT_TEST_DEPS + llc llvm-config FileCheck count not llvm-bolt @@ -22,6 +23,7 @@ list(APPEND BOLT_TEST_DEPS llvm-strip llvm-dwarfdump llvm-mc + llvm-objcopy ) add_custom_target(bolt-test-depends DEPENDS ${BOLT_TEST_DEPS}) diff --git a/bolt/test/X86/Inputs/fdata-escape-chars-syms.txt b/bolt/test/X86/Inputs/fdata-escape-chars-syms.txt new file mode 100644 index 000000000000..bccbc131d8b2 --- /dev/null +++ b/bolt/test/X86/Inputs/fdata-escape-chars-syms.txt @@ -0,0 +1,3 @@ +symb_w_whitespace symb whitespace +symb_backslash_b symb backslash\ +static_symb_backslash_b static symb backslash\ diff --git a/bolt/test/X86/Inputs/fdata-escape-chars.txt b/bolt/test/X86/Inputs/fdata-escape-chars.txt new file mode 100644 index 000000000000..076793dc9e42 --- /dev/null +++ b/bolt/test/X86/Inputs/fdata-escape-chars.txt @@ -0,0 +1,6 @@ +# PREAGR: B #main# #static symb backslash\# 1 0 +# PREAGR: B #static symb backslash\# #symb whitespace# 1 0 +# PREAGR: B #main# #symb whitespace# 1 0 +# PREAGR: B #main# #symb backslash\# 1 0 +# PREAGR: B #main# #symb backslash\# 1 0 +# PREAGR: B #symb backslash\# #symb whitespace# 2 0 diff --git a/bolt/test/X86/fdata-escape-chars.ll b/bolt/test/X86/fdata-escape-chars.ll new file mode 100644 index 000000000000..d9e7401421c4 --- /dev/null +++ b/bolt/test/X86/fdata-escape-chars.ll @@ -0,0 +1,101 @@ +@var = dso_local global i32 0, align 4 + +; Function Attrs: noinline nounwind optnone uwtable +define dso_local void @symb_w_whitespace() #0 { + store volatile i32 1, i32* @var, align 4 + ret void +} + +; Function Attrs: noinline nounwind optnone uwtable +define dso_local void @symb_backslash_b() #0 { + call void @symb_w_whitespace() + store volatile i32 2, i32* @var, align 4 + ret void +} + +; Function Attrs: noinline nounwind optnone uwtable +define dso_local i32 @main() #0 { + %1 = alloca i32, align 4 + %2 = alloca i32, align 4 + store i32 0, i32* %1, align 4 + call void @static_symb_backslash_b() + call void @symb_w_whitespace() + store i32 0, i32* %2, align 4 + br label %3 + +3: ; preds = %7, %0 + %4 = load i32, i32* %2, align 4 + %5 = icmp slt i32 %4, 2 + br i1 %5, label %6, label %10 + +6: ; preds = %3 + call void @symb_backslash_b() + br label %7 + +7: ; preds = %6 + %8 = load i32, i32* %2, align 4 + %9 = add nsw i32 %8, 1 + store i32 %9, i32* %2, align 4 + br label %3 + +10: ; preds = %3 + %11 = load i32, i32* %1, align 4 + ret i32 %11 +} + +; Function Attrs: noinline nounwind optnone uwtable +define internal void @static_symb_backslash_b() #0 { + call void @symb_w_whitespace() + store volatile i32 3, i32* @var, align 4 + ret void +} + +; REQUIRES: system-linux + +; RUN: llc %s -o %t.s +; RUN: %host_cc %cflags -O0 %t.s -o %t.exe -Wl,-q +; RUN: llvm-objcopy --redefine-syms=%p/Inputs/fdata-escape-chars-syms.txt %t.exe +; +; RUN: llvm-bolt %t.exe -o %t.exe.instrumented -instrument \ +; RUN: -instrumentation-file=%t.fdata +; RUN: %t.exe.instrumented +; RUN: cat %t.fdata | \ +; RUN: FileCheck --check-prefix="FDATA_CHECK" %s +; RUN: llvm-bolt %t.exe -o %t.fdata.exe -data %t.fdata -print-finalized | \ +; RUN: FileCheck --check-prefix="INSTR_CHECK" %s +; +; RUN: link_fdata %p/Inputs/fdata-escape-chars.txt %t.exe %t.pre "PREAGR" +; RUN: perf2bolt %t.exe -o %t.pre.fdata -pa -p %t.pre +; RUN: cat %t.pre.fdata | FileCheck --check-prefix="PREAGR_FDATA_CHECK" %s +; RUN: llvm-bolt %t.exe -o %t.pre.fdata.exe -data %t.pre.fdata -print-finalized | \ +; RUN: FileCheck --check-prefix="PREAGR_CHECK" %s + +; FDATA_CHECK: 1 symb\ backslash\\ {{([[:xdigit:]]+)}} 1 symb\ whitespace 0 0 2 +; FDATA_CHECK: 1 main {{([[:xdigit:]]+)}} 1 symb\ whitespace 0 0 1 +; FDATA_CHECK: 1 main {{([[:xdigit:]]+)}} 1 symb\ backslash\\ 0 0 2 + +; INSTR_CHECK: Binary Function "symb whitespace" +; INSTR_CHECK: Exec Count : 4 +; INSTR_CHECK: Binary Function "symb backslash\" +; INSTR_CHECK: Exec Count : 2 +; INSTR_CHECK: {{([[:xdigit:]]+)}}: callq "symb whitespace" # Count: 2 +; INSTR_CHECK: Binary Function "main" +; INSTR_CHECK: Exec Count : 1 +; INSTR_CHECK: {{([[:xdigit:]]+)}}: callq "symb whitespace" # Count: 1 +; INSTR_CHECK: {{([[:xdigit:]]+)}}: callq "symb backslash\" # Count: 2 +; INSTR_CHECK: Binary Function "static symb backslash\/1(*2)" +; INSTR_CHECK: Exec Count : 1 +; INSTR_CHECK: {{([[:xdigit:]]+)}}: callq "symb whitespace" # Count: 1 + +; PREAGR_FDATA_CHECK: 1 symb\ backslash\\ 0 1 symb\ whitespace 0 0 2 +; PREAGR_FDATA_CHECK: 1 main 0 1 static\ symb\ backslash\\/1 0 0 1 +; PREAGR_FDATA_CHECK: 1 main 0 1 symb\ whitespace 0 0 1 +; PREAGR_FDATA_CHECK: 1 main 0 1 symb\ backslash\\ 0 0 2 +; PREAGR_FDATA_CHECK: 1 static\ symb\ backslash\\/1 0 1 symb\ whitespace 0 0 1 + +; PREAGR_CHECK: Binary Function "symb whitespace" +; PREAGR_CHECK-DAG: Exec Count : 4 +; PREAGR_CHECK: Binary Function "symb backslash\" +; PREAGR_CHECK-DAG: Exec Count : 2 +; PREAGR_CHECK: Binary Function "static symb backslash\/1(*2)" +; PREAGR_CHECK-DAG: Exec Count : 1 diff --git a/bolt/test/link_fdata.sh b/bolt/test/link_fdata.sh index 4f3b0c16d4ca..348dadc2957e 100755 --- a/bolt/test/link_fdata.sh +++ b/bolt/test/link_fdata.sh @@ -7,7 +7,7 @@ mapfile -t symbols < <(nm --defined-only "$2") for line in "${symbols[@]}"; do val=$(echo $line | cut -d' ' -f1) - symname=$(echo $line | cut -d' ' -f3) + symname=$(echo $line | awk '{ $1=$2=""; print $0 }' | sed 's/^[ \t]*//') if [ -z "$symname" ]; then continue fi diff --git a/bolt/test/lit.cfg.py b/bolt/test/lit.cfg.py index b00714e9c45b..5633b8092bb1 100644 --- a/bolt/test/lit.cfg.py +++ b/bolt/test/lit.cfg.py @@ -51,6 +51,7 @@ tool_dirs = [config.llvm_tools_dir, config.test_source_root] tools = [ + ToolSubst('llc', unresolved='fatal'), ToolSubst('llvm-dwarfdump', unresolved='fatal'), ToolSubst('llvm-bolt', unresolved='fatal'), ToolSubst('perf2bolt', unresolved='fatal'), @@ -58,6 +59,7 @@ tools = [ ToolSubst('llvm-mc', unresolved='fatal'), ToolSubst('llvm-nm', unresolved='fatal'), ToolSubst('llvm-objdump', unresolved='fatal'), + ToolSubst('llvm-objcopy', unresolved='fatal'), ToolSubst('llvm-strip', unresolved='fatal'), ToolSubst('link_fdata', command=FindTool('link_fdata.sh'), unresolved='fatal'), ]