2013-06-05 10:32:26 +08:00
|
|
|
//===- YAML.cpp - YAMLIO utilities for object files -----------------------===//
|
|
|
|
//
|
2019-01-19 16:50:56 +08:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2013-06-05 10:32:26 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
// This file defines utility classes for handling the YAML representation of
|
|
|
|
// object files.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2016-03-02 03:15:06 +08:00
|
|
|
#include "llvm/ObjectYAML/YAML.h"
|
2013-08-07 13:51:27 +08:00
|
|
|
#include "llvm/ADT/StringExtras.h"
|
2013-06-06 06:59:00 +08:00
|
|
|
#include "llvm/Support/raw_ostream.h"
|
2013-10-12 08:55:57 +08:00
|
|
|
#include <cctype>
|
2017-07-01 09:35:55 +08:00
|
|
|
#include <cstdint>
|
2013-06-05 10:32:26 +08:00
|
|
|
|
|
|
|
using namespace llvm;
|
|
|
|
|
2014-07-03 10:01:39 +08:00
|
|
|
void yaml::ScalarTraits<yaml::BinaryRef>::output(
|
2017-07-01 09:35:55 +08:00
|
|
|
const yaml::BinaryRef &Val, void *, raw_ostream &Out) {
|
2013-06-06 07:47:23 +08:00
|
|
|
Val.writeAsHex(Out);
|
2013-06-05 10:32:26 +08:00
|
|
|
}
|
|
|
|
|
2014-07-03 10:01:39 +08:00
|
|
|
StringRef yaml::ScalarTraits<yaml::BinaryRef>::input(StringRef Scalar, void *,
|
|
|
|
yaml::BinaryRef &Val) {
|
2013-06-06 06:59:00 +08:00
|
|
|
if (Scalar.size() % 2 != 0)
|
|
|
|
return "BinaryRef hex string must contain an even number of nybbles.";
|
|
|
|
// TODO: Can we improve YAMLIO to permit a more accurate diagnostic here?
|
|
|
|
// (e.g. a caret pointing to the offending character).
|
|
|
|
for (unsigned I = 0, N = Scalar.size(); I != N; ++I)
|
[yaml::BinaryRef] Slight perf tuning (for llvm-exegesis analysis mode)
Summary:
llvm-exegesis uses this functionality to read it's benchmark dumps.
This reading of `.yaml`s takes ~60% of runtime for 14656 benchmark points (i.e. one sweep over all x86 instructions),
but only 30% of time for 3x as much benchmark points.
In particular, this `BinaryRef` appears to be an obvious pain point.
Without patch:
```
$ perf stat -r 25 ./bin/llvm-exegesis -mode=analysis -analysis-epsilon=1.0 -benchmarks-file=/tmp/benchmarks-inverse_throughput-onefull.yaml -analysis-clusters-output-file="" -analysis-inconsistencies-output-file=/tmp/clusters-orig.html
no exegesis target for x86_64-unknown-linux-gnu, using default
Parsed 14656 benchmark points
Printing sched class consistency analysis results to file '/tmp/clusters-orig.html'
...
no exegesis target for x86_64-unknown-linux-gnu, using default
Parsed 14656 benchmark points
Printing sched class consistency analysis results to file '/tmp/clusters-orig.html'
Performance counter stats for './bin/llvm-exegesis -mode=analysis -analysis-epsilon=1.0 -benchmarks-file=/tmp/benchmarks-inverse_throughput-onefull.yaml -analysis-clusters-output-file= -analysis-inconsistencies-output-file=/tmp/clusters-orig.html' (25 runs):
972.86 msec task-clock # 0.994 CPUs utilized ( +- 0.25% )
30 context-switches # 30.774 M/sec ( +- 21.74% )
0 cpu-migrations # 0.370 M/sec ( +- 67.81% )
11873 page-faults # 12211.512 M/sec ( +- 0.00% )
3898373408 cycles # 4009682.186 GHz ( +- 0.25% ) (83.12%)
360399748 stalled-cycles-frontend # 9.24% frontend cycles idle ( +- 0.54% ) (83.24%)
1099450483 stalled-cycles-backend # 28.20% backend cycles idle ( +- 0.59% ) (33.63%)
4910528820 instructions # 1.26 insn per cycle
# 0.22 stalled cycles per insn ( +- 0.13% ) (50.21%)
1111976775 branches # 1143726625.854 M/sec ( +- 0.10% ) (66.77%)
23248474 branch-misses # 2.09% of all branches ( +- 0.19% ) (83.29%)
0.97850 +- 0.00647 seconds time elapsed ( +- 0.66% )
```
With the patch:
```
$ perf stat -r 25 ./bin/llvm-exegesis -mode=analysis -analysis-epsilon=1.0 -benchmarks-file=/tmp/benchmarks-inverse_throughput-onefull.yaml -analysis-clusters-output-file="" -analysis-inconsistencies-output-file=/tmp/clusters-new.html
no exegesis target for x86_64-unknown-linux-gnu, using default
Parsed 14656 benchmark points
Printing sched class consistency analysis results to file '/tmp/clusters-new.html'
...
no exegesis target for x86_64-unknown-linux-gnu, using default
Parsed 14656 benchmark points
Printing sched class consistency analysis results to file '/tmp/clusters-new.html'
Performance counter stats for './bin/llvm-exegesis -mode=analysis -analysis-epsilon=1.0 -benchmarks-file=/tmp/benchmarks-inverse_throughput-onefull.yaml -analysis-clusters-output-file= -analysis-inconsistencies-output-file=/tmp/clusters-new.html' (25 runs):
905.29 msec task-clock # 0.999 CPUs utilized ( +- 0.11% )
15 context-switches # 16.533 M/sec ( +- 32.27% )
0 cpu-migrations # 0.000 K/sec
11873 page-faults # 13121.789 M/sec ( +- 0.00% )
3627759720 cycles # 4009283.100 GHz ( +- 0.11% ) (83.19%)
370401480 stalled-cycles-frontend # 10.21% frontend cycles idle ( +- 0.22% ) (83.19%)
1007114438 stalled-cycles-backend # 27.76% backend cycles idle ( +- 0.34% ) (33.62%)
4414014304 instructions # 1.22 insn per cycle
# 0.23 stalled cycles per insn ( +- 0.08% ) (50.36%)
1003751700 branches # 1109314021.971 M/sec ( +- 0.07% ) (66.97%)
24611010 branch-misses # 2.45% of all branches ( +- 0.10% ) (83.41%)
0.90593 +- 0.00105 seconds time elapsed ( +- 0.12% )
```
So this decreases the overall run time of llvm-exegesis analysis mode (on one sweep) by roughly -7%.
To be noted, `BinaryRef::writeAsBinary()` change is the reason for the perf changes,
usage of `llvm::isHexDigit()` instead of `isxdigit()` does not appear to have any perf impact,
i have only changed it "for symmetry".
`writeAsBinary()` change is correct, it produces identical de-hex-ified buffer, and the final output is thus identical:
```
$ sha512sum /tmp/clusters-*
db4bbd904fe8840853b589b032c5041bc060b91bcd9c27b914b56581fbc473550eea74b852238c79963b5adf2419f379e9f5db76784048b48e3937f9f3e732bf /tmp/clusters-new.html
db4bbd904fe8840853b589b032c5041bc060b91bcd9c27b914b56581fbc473550eea74b852238c79963b5adf2419f379e9f5db76784048b48e3937f9f3e732bf /tmp/clusters-orig.html
```
Reviewers: silvas, espindola, sbc100, zturner, courbet, gchatelet
Reviewed By: gchatelet
Subscribers: tschuett, RKSimon, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57699
llvm-svn: 353282
2019-02-06 16:57:01 +08:00
|
|
|
if (!llvm::isHexDigit(Scalar[I]))
|
2013-06-06 06:59:00 +08:00
|
|
|
return "BinaryRef hex string must contain only hex digits.";
|
2014-07-03 10:01:39 +08:00
|
|
|
Val = yaml::BinaryRef(Scalar);
|
2017-07-01 09:35:55 +08:00
|
|
|
return {};
|
2013-06-05 10:32:26 +08:00
|
|
|
}
|
2013-06-06 06:59:00 +08:00
|
|
|
|
2014-07-03 10:01:39 +08:00
|
|
|
void yaml::BinaryRef::writeAsBinary(raw_ostream &OS) const {
|
2013-06-06 07:32:31 +08:00
|
|
|
if (!DataIsHexString) {
|
2013-06-06 06:59:00 +08:00
|
|
|
OS.write((const char *)Data.data(), Data.size());
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
for (unsigned I = 0, N = Data.size(); I != N; I += 2) {
|
[yaml::BinaryRef] Slight perf tuning (for llvm-exegesis analysis mode)
Summary:
llvm-exegesis uses this functionality to read it's benchmark dumps.
This reading of `.yaml`s takes ~60% of runtime for 14656 benchmark points (i.e. one sweep over all x86 instructions),
but only 30% of time for 3x as much benchmark points.
In particular, this `BinaryRef` appears to be an obvious pain point.
Without patch:
```
$ perf stat -r 25 ./bin/llvm-exegesis -mode=analysis -analysis-epsilon=1.0 -benchmarks-file=/tmp/benchmarks-inverse_throughput-onefull.yaml -analysis-clusters-output-file="" -analysis-inconsistencies-output-file=/tmp/clusters-orig.html
no exegesis target for x86_64-unknown-linux-gnu, using default
Parsed 14656 benchmark points
Printing sched class consistency analysis results to file '/tmp/clusters-orig.html'
...
no exegesis target for x86_64-unknown-linux-gnu, using default
Parsed 14656 benchmark points
Printing sched class consistency analysis results to file '/tmp/clusters-orig.html'
Performance counter stats for './bin/llvm-exegesis -mode=analysis -analysis-epsilon=1.0 -benchmarks-file=/tmp/benchmarks-inverse_throughput-onefull.yaml -analysis-clusters-output-file= -analysis-inconsistencies-output-file=/tmp/clusters-orig.html' (25 runs):
972.86 msec task-clock # 0.994 CPUs utilized ( +- 0.25% )
30 context-switches # 30.774 M/sec ( +- 21.74% )
0 cpu-migrations # 0.370 M/sec ( +- 67.81% )
11873 page-faults # 12211.512 M/sec ( +- 0.00% )
3898373408 cycles # 4009682.186 GHz ( +- 0.25% ) (83.12%)
360399748 stalled-cycles-frontend # 9.24% frontend cycles idle ( +- 0.54% ) (83.24%)
1099450483 stalled-cycles-backend # 28.20% backend cycles idle ( +- 0.59% ) (33.63%)
4910528820 instructions # 1.26 insn per cycle
# 0.22 stalled cycles per insn ( +- 0.13% ) (50.21%)
1111976775 branches # 1143726625.854 M/sec ( +- 0.10% ) (66.77%)
23248474 branch-misses # 2.09% of all branches ( +- 0.19% ) (83.29%)
0.97850 +- 0.00647 seconds time elapsed ( +- 0.66% )
```
With the patch:
```
$ perf stat -r 25 ./bin/llvm-exegesis -mode=analysis -analysis-epsilon=1.0 -benchmarks-file=/tmp/benchmarks-inverse_throughput-onefull.yaml -analysis-clusters-output-file="" -analysis-inconsistencies-output-file=/tmp/clusters-new.html
no exegesis target for x86_64-unknown-linux-gnu, using default
Parsed 14656 benchmark points
Printing sched class consistency analysis results to file '/tmp/clusters-new.html'
...
no exegesis target for x86_64-unknown-linux-gnu, using default
Parsed 14656 benchmark points
Printing sched class consistency analysis results to file '/tmp/clusters-new.html'
Performance counter stats for './bin/llvm-exegesis -mode=analysis -analysis-epsilon=1.0 -benchmarks-file=/tmp/benchmarks-inverse_throughput-onefull.yaml -analysis-clusters-output-file= -analysis-inconsistencies-output-file=/tmp/clusters-new.html' (25 runs):
905.29 msec task-clock # 0.999 CPUs utilized ( +- 0.11% )
15 context-switches # 16.533 M/sec ( +- 32.27% )
0 cpu-migrations # 0.000 K/sec
11873 page-faults # 13121.789 M/sec ( +- 0.00% )
3627759720 cycles # 4009283.100 GHz ( +- 0.11% ) (83.19%)
370401480 stalled-cycles-frontend # 10.21% frontend cycles idle ( +- 0.22% ) (83.19%)
1007114438 stalled-cycles-backend # 27.76% backend cycles idle ( +- 0.34% ) (33.62%)
4414014304 instructions # 1.22 insn per cycle
# 0.23 stalled cycles per insn ( +- 0.08% ) (50.36%)
1003751700 branches # 1109314021.971 M/sec ( +- 0.07% ) (66.97%)
24611010 branch-misses # 2.45% of all branches ( +- 0.10% ) (83.41%)
0.90593 +- 0.00105 seconds time elapsed ( +- 0.12% )
```
So this decreases the overall run time of llvm-exegesis analysis mode (on one sweep) by roughly -7%.
To be noted, `BinaryRef::writeAsBinary()` change is the reason for the perf changes,
usage of `llvm::isHexDigit()` instead of `isxdigit()` does not appear to have any perf impact,
i have only changed it "for symmetry".
`writeAsBinary()` change is correct, it produces identical de-hex-ified buffer, and the final output is thus identical:
```
$ sha512sum /tmp/clusters-*
db4bbd904fe8840853b589b032c5041bc060b91bcd9c27b914b56581fbc473550eea74b852238c79963b5adf2419f379e9f5db76784048b48e3937f9f3e732bf /tmp/clusters-new.html
db4bbd904fe8840853b589b032c5041bc060b91bcd9c27b914b56581fbc473550eea74b852238c79963b5adf2419f379e9f5db76784048b48e3937f9f3e732bf /tmp/clusters-orig.html
```
Reviewers: silvas, espindola, sbc100, zturner, courbet, gchatelet
Reviewed By: gchatelet
Subscribers: tschuett, RKSimon, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57699
llvm-svn: 353282
2019-02-06 16:57:01 +08:00
|
|
|
uint8_t Byte = llvm::hexDigitValue(Data[I]);
|
|
|
|
Byte <<= 4;
|
|
|
|
Byte |= llvm::hexDigitValue(Data[I + 1]);
|
2013-06-06 06:59:00 +08:00
|
|
|
OS.write(Byte);
|
|
|
|
}
|
|
|
|
}
|
2013-06-06 07:47:23 +08:00
|
|
|
|
2014-07-03 10:01:39 +08:00
|
|
|
void yaml::BinaryRef::writeAsHex(raw_ostream &OS) const {
|
2014-03-20 14:28:52 +08:00
|
|
|
if (binary_size() == 0)
|
2013-07-09 08:54:46 +08:00
|
|
|
return;
|
2013-06-06 07:47:23 +08:00
|
|
|
if (DataIsHexString) {
|
|
|
|
OS.write((const char *)Data.data(), Data.size());
|
|
|
|
return;
|
|
|
|
}
|
2016-03-01 18:11:27 +08:00
|
|
|
for (uint8_t Byte : Data)
|
|
|
|
OS << hexdigit(Byte >> 4) << hexdigit(Byte & 0xf);
|
2013-06-06 07:47:23 +08:00
|
|
|
}
|