diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt index df34265ddc47..9e2d2365b653 100644 --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -1209,6 +1209,8 @@ if (LLVM_INCLUDE_BENCHMARKS) set(BENCHMARK_ENABLE_INSTALL OFF CACHE BOOL "Don't install benchmark" FORCE) set(BENCHMARK_DOWNLOAD_DEPENDENCIES OFF CACHE BOOL "Don't download dependencies" FORCE) set(BENCHMARK_ENABLE_GTEST_TESTS OFF CACHE BOOL "Disable Google Test in benchmark" FORCE) + set(BENCHMARK_ENABLE_WERROR ${LLVM_ENABLE_WERROR} CACHE BOOL + "Handle -Werror for Google Benchmark based on LLVM_ENABLE_WERROR" FORCE) # Since LLVM requires C++11 it is safe to assume that std::regex is available. set(HAVE_STD_REGEX ON CACHE BOOL "OK" FORCE) add_subdirectory(${LLVM_THIRD_PARTY_DIR}/benchmark diff --git a/third-party/benchmark/.clang-format b/third-party/benchmark/.clang-format new file mode 100644 index 000000000000..e7d00feaa08a --- /dev/null +++ b/third-party/benchmark/.clang-format @@ -0,0 +1,5 @@ +--- +Language: Cpp +BasedOnStyle: Google +PointerAlignment: Left +... diff --git a/third-party/benchmark/.clang-tidy b/third-party/benchmark/.clang-tidy new file mode 100644 index 000000000000..56938a598d1e --- /dev/null +++ b/third-party/benchmark/.clang-tidy @@ -0,0 +1,7 @@ +--- +Checks: 'clang-analyzer-*,readability-redundant-*,performance-*' +WarningsAsErrors: 'clang-analyzer-*,readability-redundant-*,performance-*' +HeaderFilterRegex: '.*' +AnalyzeTemporaryDtors: false +FormatStyle: none +User: user diff --git a/third-party/benchmark/.travis.yml b/third-party/benchmark/.travis.yml new file mode 100644 index 000000000000..8cfed3d10dab --- /dev/null +++ b/third-party/benchmark/.travis.yml @@ -0,0 +1,208 @@ +sudo: required +dist: trusty +language: cpp + +matrix: + include: + - compiler: gcc + addons: + apt: + packages: + - lcov + env: COMPILER=g++ C_COMPILER=gcc BUILD_TYPE=Coverage + - compiler: gcc + addons: + apt: + packages: + - g++-multilib + - libc6:i386 + env: + - COMPILER=g++ + - C_COMPILER=gcc + - BUILD_TYPE=Debug + - BUILD_32_BITS=ON + - EXTRA_FLAGS="-m32" + - compiler: gcc + addons: + apt: + packages: + - g++-multilib + - libc6:i386 + env: + - COMPILER=g++ + - C_COMPILER=gcc + - BUILD_TYPE=Release + - BUILD_32_BITS=ON + - EXTRA_FLAGS="-m32" + - compiler: gcc + env: + - INSTALL_GCC6_FROM_PPA=1 + - COMPILER=g++-6 C_COMPILER=gcc-6 BUILD_TYPE=Debug + - ENABLE_SANITIZER=1 + - EXTRA_FLAGS="-fno-omit-frame-pointer -g -O2 -fsanitize=undefined,address -fuse-ld=gold" + # Clang w/ libc++ + - compiler: clang + dist: xenial + addons: + apt: + packages: + clang-3.8 + env: + - INSTALL_GCC6_FROM_PPA=1 + - COMPILER=clang++-3.8 C_COMPILER=clang-3.8 BUILD_TYPE=Debug + - LIBCXX_BUILD=1 + - EXTRA_CXX_FLAGS="-stdlib=libc++" + - compiler: clang + dist: xenial + addons: + apt: + packages: + clang-3.8 + env: + - INSTALL_GCC6_FROM_PPA=1 + - COMPILER=clang++-3.8 C_COMPILER=clang-3.8 BUILD_TYPE=Release + - LIBCXX_BUILD=1 + - EXTRA_CXX_FLAGS="-stdlib=libc++" + # Clang w/ 32bit libc++ + - compiler: clang + dist: xenial + addons: + apt: + packages: + - clang-3.8 + - g++-multilib + - libc6:i386 + env: + - INSTALL_GCC6_FROM_PPA=1 + - COMPILER=clang++-3.8 C_COMPILER=clang-3.8 BUILD_TYPE=Debug + - LIBCXX_BUILD=1 + - BUILD_32_BITS=ON + - EXTRA_FLAGS="-m32" + - EXTRA_CXX_FLAGS="-stdlib=libc++" + # Clang w/ 32bit libc++ + - compiler: clang + dist: xenial + addons: + apt: + packages: + - clang-3.8 + - g++-multilib + - libc6:i386 + env: + - INSTALL_GCC6_FROM_PPA=1 + - COMPILER=clang++-3.8 C_COMPILER=clang-3.8 BUILD_TYPE=Release + - LIBCXX_BUILD=1 + - BUILD_32_BITS=ON + - EXTRA_FLAGS="-m32" + - EXTRA_CXX_FLAGS="-stdlib=libc++" + # Clang w/ libc++, ASAN, UBSAN + - compiler: clang + dist: xenial + addons: + apt: + packages: + clang-3.8 + env: + - INSTALL_GCC6_FROM_PPA=1 + - COMPILER=clang++-3.8 C_COMPILER=clang-3.8 BUILD_TYPE=Debug + - LIBCXX_BUILD=1 LIBCXX_SANITIZER="Undefined;Address" + - ENABLE_SANITIZER=1 + - EXTRA_FLAGS="-g -O2 -fno-omit-frame-pointer -fsanitize=undefined,address -fno-sanitize-recover=all" + - EXTRA_CXX_FLAGS="-stdlib=libc++" + - UBSAN_OPTIONS=print_stacktrace=1 + # Clang w/ libc++ and MSAN + - compiler: clang + dist: xenial + addons: + apt: + packages: + clang-3.8 + env: + - INSTALL_GCC6_FROM_PPA=1 + - COMPILER=clang++-3.8 C_COMPILER=clang-3.8 BUILD_TYPE=Debug + - LIBCXX_BUILD=1 LIBCXX_SANITIZER=MemoryWithOrigins + - ENABLE_SANITIZER=1 + - EXTRA_FLAGS="-g -O2 -fno-omit-frame-pointer -fsanitize=memory -fsanitize-memory-track-origins" + - EXTRA_CXX_FLAGS="-stdlib=libc++" + # Clang w/ libc++ and MSAN + - compiler: clang + dist: xenial + addons: + apt: + packages: + clang-3.8 + env: + - INSTALL_GCC6_FROM_PPA=1 + - COMPILER=clang++-3.8 C_COMPILER=clang-3.8 BUILD_TYPE=RelWithDebInfo + - LIBCXX_BUILD=1 LIBCXX_SANITIZER=Thread + - ENABLE_SANITIZER=1 + - EXTRA_FLAGS="-g -O2 -fno-omit-frame-pointer -fsanitize=thread -fno-sanitize-recover=all" + - EXTRA_CXX_FLAGS="-stdlib=libc++" + - os: osx + osx_image: xcode8.3 + compiler: clang + env: + - COMPILER=clang++ + - BUILD_TYPE=Release + - BUILD_32_BITS=ON + - EXTRA_FLAGS="-m32" + +before_script: + - if [ -n "${LIBCXX_BUILD}" ]; then + source .libcxx-setup.sh; + fi + - if [ -n "${ENABLE_SANITIZER}" ]; then + export EXTRA_OPTIONS="-DBENCHMARK_ENABLE_ASSEMBLY_TESTS=OFF"; + else + export EXTRA_OPTIONS=""; + fi + - mkdir -p build && cd build + +before_install: + - if [ -z "$BUILD_32_BITS" ]; then + export BUILD_32_BITS=OFF && echo disabling 32 bit build; + fi + - if [ -n "${INSTALL_GCC6_FROM_PPA}" ]; then + sudo add-apt-repository -y "ppa:ubuntu-toolchain-r/test"; + sudo apt-get update --option Acquire::Retries=100 --option Acquire::http::Timeout="60"; + fi + +install: + - if [ -n "${INSTALL_GCC6_FROM_PPA}" ]; then + travis_wait sudo -E apt-get -yq --no-install-suggests --no-install-recommends install g++-6; + fi + - if [ "${TRAVIS_OS_NAME}" == "linux" -a "${BUILD_32_BITS}" == "OFF" ]; then + travis_wait sudo -E apt-get -y --no-install-suggests --no-install-recommends install llvm-3.9-tools; + sudo cp /usr/lib/llvm-3.9/bin/FileCheck /usr/local/bin/; + fi + - if [ "${BUILD_TYPE}" == "Coverage" -a "${TRAVIS_OS_NAME}" == "linux" ]; then + PATH=~/.local/bin:${PATH}; + pip install --user --upgrade pip; + travis_wait pip install --user cpp-coveralls; + fi + - if [ "${C_COMPILER}" == "gcc-7" -a "${TRAVIS_OS_NAME}" == "osx" ]; then + rm -f /usr/local/include/c++; + brew update; + travis_wait brew install gcc@7; + fi + - if [ "${TRAVIS_OS_NAME}" == "linux" ]; then + sudo apt-get update -qq; + sudo apt-get install -qq unzip cmake3; + wget https://github.com/bazelbuild/bazel/releases/download/3.2.0/bazel-3.2.0-installer-linux-x86_64.sh --output-document bazel-installer.sh; + travis_wait sudo bash bazel-installer.sh; + fi + - if [ "${TRAVIS_OS_NAME}" == "osx" ]; then + curl -L -o bazel-installer.sh https://github.com/bazelbuild/bazel/releases/download/3.2.0/bazel-3.2.0-installer-darwin-x86_64.sh; + travis_wait sudo bash bazel-installer.sh; + fi + +script: + - cmake -DCMAKE_C_COMPILER=${C_COMPILER} -DCMAKE_CXX_COMPILER=${COMPILER} -DCMAKE_BUILD_TYPE=${BUILD_TYPE} -DCMAKE_C_FLAGS="${EXTRA_FLAGS}" -DCMAKE_CXX_FLAGS="${EXTRA_FLAGS} ${EXTRA_CXX_FLAGS}" -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON -DBENCHMARK_BUILD_32_BITS=${BUILD_32_BITS} ${EXTRA_OPTIONS} .. + - make + - ctest -C ${BUILD_TYPE} --output-on-failure + - bazel test -c dbg --define google_benchmark.have_regex=posix --announce_rc --verbose_failures --test_output=errors --keep_going //test/... + +after_success: + - if [ "${BUILD_TYPE}" == "Coverage" -a "${TRAVIS_OS_NAME}" == "linux" ]; then + coveralls --include src --include include --gcov-options '\-lp' --root .. --build-root .; + fi diff --git a/third-party/benchmark/.ycm_extra_conf.py b/third-party/benchmark/.ycm_extra_conf.py new file mode 100644 index 000000000000..5649ddcc749f --- /dev/null +++ b/third-party/benchmark/.ycm_extra_conf.py @@ -0,0 +1,115 @@ +import os +import ycm_core + +# These are the compilation flags that will be used in case there's no +# compilation database set (by default, one is not set). +# CHANGE THIS LIST OF FLAGS. YES, THIS IS THE DROID YOU HAVE BEEN LOOKING FOR. +flags = [ +'-Wall', +'-Werror', +'-pedantic-errors', +'-std=c++0x', +'-fno-strict-aliasing', +'-O3', +'-DNDEBUG', +# ...and the same thing goes for the magic -x option which specifies the +# language that the files to be compiled are written in. This is mostly +# relevant for c++ headers. +# For a C project, you would set this to 'c' instead of 'c++'. +'-x', 'c++', +'-I', 'include', +'-isystem', '/usr/include', +'-isystem', '/usr/local/include', +] + + +# Set this to the absolute path to the folder (NOT the file!) containing the +# compile_commands.json file to use that instead of 'flags'. See here for +# more details: http://clang.llvm.org/docs/JSONCompilationDatabase.html +# +# Most projects will NOT need to set this to anything; you can just change the +# 'flags' list of compilation flags. Notice that YCM itself uses that approach. +compilation_database_folder = '' + +if os.path.exists( compilation_database_folder ): + database = ycm_core.CompilationDatabase( compilation_database_folder ) +else: + database = None + +SOURCE_EXTENSIONS = [ '.cc' ] + +def DirectoryOfThisScript(): + return os.path.dirname( os.path.abspath( __file__ ) ) + + +def MakeRelativePathsInFlagsAbsolute( flags, working_directory ): + if not working_directory: + return list( flags ) + new_flags = [] + make_next_absolute = False + path_flags = [ '-isystem', '-I', '-iquote', '--sysroot=' ] + for flag in flags: + new_flag = flag + + if make_next_absolute: + make_next_absolute = False + if not flag.startswith( '/' ): + new_flag = os.path.join( working_directory, flag ) + + for path_flag in path_flags: + if flag == path_flag: + make_next_absolute = True + break + + if flag.startswith( path_flag ): + path = flag[ len( path_flag ): ] + new_flag = path_flag + os.path.join( working_directory, path ) + break + + if new_flag: + new_flags.append( new_flag ) + return new_flags + + +def IsHeaderFile( filename ): + extension = os.path.splitext( filename )[ 1 ] + return extension in [ '.h', '.hxx', '.hpp', '.hh' ] + + +def GetCompilationInfoForFile( filename ): + # The compilation_commands.json file generated by CMake does not have entries + # for header files. So we do our best by asking the db for flags for a + # corresponding source file, if any. If one exists, the flags for that file + # should be good enough. + if IsHeaderFile( filename ): + basename = os.path.splitext( filename )[ 0 ] + for extension in SOURCE_EXTENSIONS: + replacement_file = basename + extension + if os.path.exists( replacement_file ): + compilation_info = database.GetCompilationInfoForFile( + replacement_file ) + if compilation_info.compiler_flags_: + return compilation_info + return None + return database.GetCompilationInfoForFile( filename ) + + +def FlagsForFile( filename, **kwargs ): + if database: + # Bear in mind that compilation_info.compiler_flags_ does NOT return a + # python list, but a "list-like" StringVec object + compilation_info = GetCompilationInfoForFile( filename ) + if not compilation_info: + return None + + final_flags = MakeRelativePathsInFlagsAbsolute( + compilation_info.compiler_flags_, + compilation_info.compiler_working_dir_ ) + else: + relative_to = DirectoryOfThisScript() + final_flags = MakeRelativePathsInFlagsAbsolute( flags, relative_to ) + + return { + 'flags': final_flags, + 'do_cache': True + } diff --git a/third-party/benchmark/AUTHORS b/third-party/benchmark/AUTHORS index 838dd4f5bd5e..54770f35499c 100644 --- a/third-party/benchmark/AUTHORS +++ b/third-party/benchmark/AUTHORS @@ -21,6 +21,8 @@ David Coeurjolly Deniz Evrenci Dirac Research Dominik Czarnota +Dominik Korman +Donald Aingworth Eric Backus Eric Fiselier Eugene Zhuk diff --git a/third-party/benchmark/BUILD.bazel b/third-party/benchmark/BUILD.bazel index eb35b62730c6..904c691d643e 100644 --- a/third-party/benchmark/BUILD.bazel +++ b/third-party/benchmark/BUILD.bazel @@ -1,9 +1,17 @@ -load("@rules_cc//cc:defs.bzl", "cc_library") - licenses(["notice"]) +config_setting( + name = "qnx", + constraint_values = ["@platforms//os:qnx"], + values = { + "cpu": "x64_qnx", + }, + visibility = [":__subpackages__"], +) + config_setting( name = "windows", + constraint_values = ["@platforms//os:windows"], values = { "cpu": "x64_windows", }, diff --git a/third-party/benchmark/CMakeLists.txt b/third-party/benchmark/CMakeLists.txt index ef8dcdc68cfb..8af49406d052 100644 --- a/third-party/benchmark/CMakeLists.txt +++ b/third-party/benchmark/CMakeLists.txt @@ -13,18 +13,31 @@ foreach(p endif() endforeach() -project (benchmark VERSION 1.5.4 LANGUAGES CXX) +project (benchmark VERSION 1.6.0 LANGUAGES CXX) option(BENCHMARK_ENABLE_TESTING "Enable testing of the benchmark library." ON) option(BENCHMARK_ENABLE_EXCEPTIONS "Enable the use of exceptions in the benchmark library." ON) option(BENCHMARK_ENABLE_LTO "Enable link time optimisation of the benchmark library." OFF) option(BENCHMARK_USE_LIBCXX "Build and test using libc++ as the standard library." OFF) +option(BENCHMARK_ENABLE_WERROR "Build Release candidates with -Werror." ON) +option(BENCHMARK_FORCE_WERROR "Build Release candidates with -Werror regardless of compiler issues." OFF) + +if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "PGI") + # PGC++ maybe reporting false positives. + set(BENCHMARK_ENABLE_WERROR OFF) +endif() +if(BENCHMARK_FORCE_WERROR) + set(BENCHMARK_ENABLE_WERROR ON) +endif(BENCHMARK_FORCE_WERROR) + if(NOT MSVC) option(BENCHMARK_BUILD_32_BITS "Build a 32 bit version of the library." OFF) else() set(BENCHMARK_BUILD_32_BITS OFF CACHE BOOL "Build a 32 bit version of the library - unsupported when using MSVC)" FORCE) endif() option(BENCHMARK_ENABLE_INSTALL "Enable installation of benchmark. (Projects embedding benchmark may want to turn this OFF.)" ON) +option(BENCHMARK_ENABLE_DOXYGEN "Build documentation with Doxygen." OFF) +option(BENCHMARK_INSTALL_DOCS "Enable installation of documentation." ON) # Allow unmet dependencies to be met using CMake's ExternalProject mechanics, which # may require downloading the source code. @@ -33,6 +46,7 @@ option(BENCHMARK_DOWNLOAD_DEPENDENCIES "Allow the downloading and in-tree buildi # This option can be used to disable building and running unit tests which depend on gtest # in cases where it is not possible to build or find a valid version of gtest. option(BENCHMARK_ENABLE_GTEST_TESTS "Enable building the unit tests which depend on gtest" ON) +option(BENCHMARK_USE_BUNDLED_GTEST "Use bundled GoogleTest. If disabled, the find_package(GTest) will be used." ON) option(BENCHMARK_ENABLE_LIBPFM "Enable performance counters provided by libpfm" OFF) @@ -112,6 +126,9 @@ string(SUBSTRING ${VERSION} 0 1 GENERIC_LIB_SOVERSION) include(CheckCXXCompilerFlag) include(AddCXXCompilerFlag) include(CXXFeatureCheck) +include(CheckLibraryExists) + +check_library_exists(rt shm_open "" HAVE_LIB_RT) if (BENCHMARK_BUILD_32_BITS) add_required_cxx_compiler_flag(-m32) @@ -160,9 +177,11 @@ else() add_cxx_compiler_flag(-Wall) add_cxx_compiler_flag(-Wextra) add_cxx_compiler_flag(-Wshadow) - add_cxx_compiler_flag(-Werror RELEASE) - add_cxx_compiler_flag(-Werror RELWITHDEBINFO) - add_cxx_compiler_flag(-Werror MINSIZEREL) + if(BENCHMARK_ENABLE_WERROR) + add_cxx_compiler_flag(-Werror RELEASE) + add_cxx_compiler_flag(-Werror RELWITHDEBINFO) + add_cxx_compiler_flag(-Werror MINSIZEREL) + endif() if (NOT BENCHMARK_ENABLE_TESTING) # Disable warning when compiling tests as gtest does not use 'override'. add_cxx_compiler_flag(-Wsuggest-override) @@ -181,9 +200,11 @@ else() add_cxx_compiler_flag(-wd1786) endif() # Disable deprecation warnings for release builds (when -Werror is enabled). - add_cxx_compiler_flag(-Wno-deprecated RELEASE) - add_cxx_compiler_flag(-Wno-deprecated RELWITHDEBINFO) - add_cxx_compiler_flag(-Wno-deprecated MINSIZEREL) + if(BENCHMARK_ENABLE_WERROR) + add_cxx_compiler_flag(-Wno-deprecated RELEASE) + add_cxx_compiler_flag(-Wno-deprecated RELWITHDEBINFO) + add_cxx_compiler_flag(-Wno-deprecated MINSIZEREL) + endif() if (NOT BENCHMARK_ENABLE_EXCEPTIONS) add_cxx_compiler_flag(-fno-exceptions) endif() @@ -307,7 +328,15 @@ if (BENCHMARK_ENABLE_TESTING) if (BENCHMARK_ENABLE_GTEST_TESTS AND NOT (TARGET gtest AND TARGET gtest_main AND TARGET gmock AND TARGET gmock_main)) - include(GoogleTest) + if (BENCHMARK_USE_BUNDLED_GTEST) + include(GoogleTest) + else() + find_package(GTest CONFIG REQUIRED) + add_library(gtest ALIAS GTest::gtest) + add_library(gtest_main ALIAS GTest::gtest_main) + add_library(gmock ALIAS GTest::gmock) + add_library(gmock_main ALIAS GTest::gmock_main) + endif() endif() add_subdirectory(test) endif() diff --git a/third-party/benchmark/CONTRIBUTORS b/third-party/benchmark/CONTRIBUTORS index 7489731de5a8..651fbeafe66a 100644 --- a/third-party/benchmark/CONTRIBUTORS +++ b/third-party/benchmark/CONTRIBUTORS @@ -38,6 +38,8 @@ David Coeurjolly Deniz Evrenci Dominic Hamon Dominik Czarnota +Dominik Korman +Donald Aingworth Eric Backus Eric Fiselier Eugene Zhuk diff --git a/third-party/benchmark/README.md b/third-party/benchmark/README.md index aa61cef1b162..7b81d960fc1d 100644 --- a/third-party/benchmark/README.md +++ b/third-party/benchmark/README.md @@ -27,14 +27,16 @@ BENCHMARK(BM_SomeFunction); BENCHMARK_MAIN(); ``` +## Getting Started + To get started, see [Requirements](#requirements) and [Installation](#installation). See [Usage](#usage) for a full example and the -[User Guide](#user-guide) for a more comprehensive feature overview. +[User Guide](docs/user_guide.md) for a more comprehensive feature overview. It may also help to read the [Google Test documentation](https://github.com/google/googletest/blob/master/docs/primer.md) as some of the structural aspects of the APIs are similar. -### Resources +## Resources [Discussion group](https://groups.google.com/d/forum/benchmark-discuss) @@ -57,27 +59,25 @@ The following minimum versions are required to build the library: * Visual Studio 14 2015 * Intel 2015 Update 1 -See [Platform-Specific Build Instructions](#platform-specific-build-instructions). +See [Platform-Specific Build Instructions](docs/platform_specific_build_instructions.md). ## Installation This describes the installation process using cmake. As pre-requisites, you'll need git and cmake installed. -_See [dependencies.md](dependencies.md) for more details regarding supported +_See [dependencies.md](docs/dependencies.md) for more details regarding supported versions of build tools._ ```bash # Check out the library. $ git clone https://github.com/google/benchmark.git -# Benchmark requires Google Test as a dependency. Add the source tree as a subdirectory. -$ git clone https://github.com/google/googletest.git benchmark/googletest # Go to the library root directory $ cd benchmark # Make a build directory to place the build output. $ cmake -E make_directory "build" -# Generate build system files with cmake. -$ cmake -E chdir "build" cmake -DCMAKE_BUILD_TYPE=Release ../ +# Generate build system files with cmake, and download any dependencies. +$ cmake -E chdir "build" cmake -DBENCHMARK_DOWNLOAD_DEPENDENCIES=on -DCMAKE_BUILD_TYPE=Release ../ # or, starting with CMake 3.13, use a simpler form: # cmake -DCMAKE_BUILD_TYPE=Release -S . -B "build" # Build the library. @@ -111,10 +111,10 @@ sudo cmake --build "build" --config Release --target install Note that Google Benchmark requires Google Test to build and run the tests. This dependency can be provided two ways: -* Checkout the Google Test sources into `benchmark/googletest` as above. +* Checkout the Google Test sources into `benchmark/googletest`. * Otherwise, if `-DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON` is specified during - configuration, the library will automatically download and build any required - dependencies. + configuration as above, the library will automatically download and build + any required dependencies. If you do not wish to build and run the tests, add `-DBENCHMARK_ENABLE_GTEST_TESTS=OFF` to `CMAKE_ARGS`. @@ -193,7 +193,7 @@ Alternatively, link against the `benchmark_main` library and remove `BENCHMARK_MAIN();` above to get the same behavior. The compiled executable will run all benchmarks by default. Pass the `--help` -flag for option information or see the guide below. +flag for option information or see the [User Guide](docs/user_guide.md). ### Usage with CMake @@ -214,1165 +214,3 @@ Either way, link to the library as follows. ```cmake target_link_libraries(MyTarget benchmark::benchmark) ``` - -## Platform Specific Build Instructions - -### Building with GCC - -When the library is built using GCC it is necessary to link with the pthread -library due to how GCC implements `std::thread`. Failing to link to pthread will -lead to runtime exceptions (unless you're using libc++), not linker errors. See -[issue #67](https://github.com/google/benchmark/issues/67) for more details. You -can link to pthread by adding `-pthread` to your linker command. Note, you can -also use `-lpthread`, but there are potential issues with ordering of command -line parameters if you use that. - -### Building with Visual Studio 2015 or 2017 - -The `shlwapi` library (`-lshlwapi`) is required to support a call to `CPUInfo` which reads the registry. Either add `shlwapi.lib` under `[ Configuration Properties > Linker > Input ]`, or use the following: - -``` -// Alternatively, can add libraries using linker options. -#ifdef _WIN32 -#pragma comment ( lib, "Shlwapi.lib" ) -#ifdef _DEBUG -#pragma comment ( lib, "benchmarkd.lib" ) -#else -#pragma comment ( lib, "benchmark.lib" ) -#endif -#endif -``` - -Can also use the graphical version of CMake: -* Open `CMake GUI`. -* Under `Where to build the binaries`, same path as source plus `build`. -* Under `CMAKE_INSTALL_PREFIX`, same path as source plus `install`. -* Click `Configure`, `Generate`, `Open Project`. -* If build fails, try deleting entire directory and starting again, or unticking options to build less. - -### Building with Intel 2015 Update 1 or Intel System Studio Update 4 - -See instructions for building with Visual Studio. Once built, right click on the solution and change the build to Intel. - -### Building on Solaris - -If you're running benchmarks on solaris, you'll want the kstat library linked in -too (`-lkstat`). - -## User Guide - -### Command Line - -[Output Formats](#output-formats) - -[Output Files](#output-files) - -[Running Benchmarks](#running-benchmarks) - -[Running a Subset of Benchmarks](#running-a-subset-of-benchmarks) - -[Result Comparison](#result-comparison) - -[Extra Context](#extra-context) - -### Library - -[Runtime and Reporting Considerations](#runtime-and-reporting-considerations) - -[Passing Arguments](#passing-arguments) - -[Custom Benchmark Name](#custom-benchmark-name) - -[Calculating Asymptotic Complexity](#asymptotic-complexity) - -[Templated Benchmarks](#templated-benchmarks) - -[Fixtures](#fixtures) - -[Custom Counters](#custom-counters) - -[Multithreaded Benchmarks](#multithreaded-benchmarks) - -[CPU Timers](#cpu-timers) - -[Manual Timing](#manual-timing) - -[Setting the Time Unit](#setting-the-time-unit) - -[Random Interleaving](docs/random_interleaving.md) - -[User-Requested Performance Counters](docs/perf_counters.md) - -[Preventing Optimization](#preventing-optimization) - -[Reporting Statistics](#reporting-statistics) - -[Custom Statistics](#custom-statistics) - -[Using RegisterBenchmark](#using-register-benchmark) - -[Exiting with an Error](#exiting-with-an-error) - -[A Faster KeepRunning Loop](#a-faster-keep-running-loop) - -[Disabling CPU Frequency Scaling](#disabling-cpu-frequency-scaling) - - - - -### Output Formats - -The library supports multiple output formats. Use the -`--benchmark_format=` flag (or set the -`BENCHMARK_FORMAT=` environment variable) to set -the format type. `console` is the default format. - -The Console format is intended to be a human readable format. By default -the format generates color output. Context is output on stderr and the -tabular data on stdout. Example tabular output looks like: - -``` -Benchmark Time(ns) CPU(ns) Iterations ----------------------------------------------------------------------- -BM_SetInsert/1024/1 28928 29349 23853 133.097kB/s 33.2742k items/s -BM_SetInsert/1024/8 32065 32913 21375 949.487kB/s 237.372k items/s -BM_SetInsert/1024/10 33157 33648 21431 1.13369MB/s 290.225k items/s -``` - -The JSON format outputs human readable json split into two top level attributes. -The `context` attribute contains information about the run in general, including -information about the CPU and the date. -The `benchmarks` attribute contains a list of every benchmark run. Example json -output looks like: - -```json -{ - "context": { - "date": "2015/03/17-18:40:25", - "num_cpus": 40, - "mhz_per_cpu": 2801, - "cpu_scaling_enabled": false, - "build_type": "debug" - }, - "benchmarks": [ - { - "name": "BM_SetInsert/1024/1", - "iterations": 94877, - "real_time": 29275, - "cpu_time": 29836, - "bytes_per_second": 134066, - "items_per_second": 33516 - }, - { - "name": "BM_SetInsert/1024/8", - "iterations": 21609, - "real_time": 32317, - "cpu_time": 32429, - "bytes_per_second": 986770, - "items_per_second": 246693 - }, - { - "name": "BM_SetInsert/1024/10", - "iterations": 21393, - "real_time": 32724, - "cpu_time": 33355, - "bytes_per_second": 1199226, - "items_per_second": 299807 - } - ] -} -``` - -The CSV format outputs comma-separated values. The `context` is output on stderr -and the CSV itself on stdout. Example CSV output looks like: - -``` -name,iterations,real_time,cpu_time,bytes_per_second,items_per_second,label -"BM_SetInsert/1024/1",65465,17890.7,8407.45,475768,118942, -"BM_SetInsert/1024/8",116606,18810.1,9766.64,3.27646e+06,819115, -"BM_SetInsert/1024/10",106365,17238.4,8421.53,4.74973e+06,1.18743e+06, -``` - - - -### Output Files - -Write benchmark results to a file with the `--benchmark_out=` option -(or set `BENCHMARK_OUT`). Specify the output format with -`--benchmark_out_format={json|console|csv}` (or set -`BENCHMARK_OUT_FORMAT={json|console|csv}`). Note that the 'csv' reporter is -deprecated and the saved `.csv` file -[is not parsable](https://github.com/google/benchmark/issues/794) by csv -parsers. - -Specifying `--benchmark_out` does not suppress the console output. - - - -### Running Benchmarks - -Benchmarks are executed by running the produced binaries. Benchmarks binaries, -by default, accept options that may be specified either through their command -line interface or by setting environment variables before execution. For every -`--option_flag=` CLI switch, a corresponding environment variable -`OPTION_FLAG=` exist and is used as default if set (CLI switches always - prevails). A complete list of CLI options is available running benchmarks - with the `--help` switch. - - - -### Running a Subset of Benchmarks - -The `--benchmark_filter=` option (or `BENCHMARK_FILTER=` -environment variable) can be used to only run the benchmarks that match -the specified ``. For example: - -```bash -$ ./run_benchmarks.x --benchmark_filter=BM_memcpy/32 -Run on (1 X 2300 MHz CPU ) -2016-06-25 19:34:24 -Benchmark Time CPU Iterations ----------------------------------------------------- -BM_memcpy/32 11 ns 11 ns 79545455 -BM_memcpy/32k 2181 ns 2185 ns 324074 -BM_memcpy/32 12 ns 12 ns 54687500 -BM_memcpy/32k 1834 ns 1837 ns 357143 -``` - - - -### Result comparison - -It is possible to compare the benchmarking results. -See [Additional Tooling Documentation](docs/tools.md) - - - -### Extra Context - -Sometimes it's useful to add extra context to the content printed before the -results. By default this section includes information about the CPU on which -the benchmarks are running. If you do want to add more context, you can use -the `benchmark_context` command line flag: - -```bash -$ ./run_benchmarks --benchmark_context=pwd=`pwd` -Run on (1 x 2300 MHz CPU) -pwd: /home/user/benchmark/ -Benchmark Time CPU Iterations ----------------------------------------------------- -BM_memcpy/32 11 ns 11 ns 79545455 -BM_memcpy/32k 2181 ns 2185 ns 324074 -``` - -You can get the same effect with the API: - -```c++ - benchmark::AddCustomContext("foo", "bar"); -``` - -Note that attempts to add a second value with the same key will fail with an -error message. - - - -### Runtime and Reporting Considerations - -When the benchmark binary is executed, each benchmark function is run serially. -The number of iterations to run is determined dynamically by running the -benchmark a few times and measuring the time taken and ensuring that the -ultimate result will be statistically stable. As such, faster benchmark -functions will be run for more iterations than slower benchmark functions, and -the number of iterations is thus reported. - -In all cases, the number of iterations for which the benchmark is run is -governed by the amount of time the benchmark takes. Concretely, the number of -iterations is at least one, not more than 1e9, until CPU time is greater than -the minimum time, or the wallclock time is 5x minimum time. The minimum time is -set per benchmark by calling `MinTime` on the registered benchmark object. - -Average timings are then reported over the iterations run. If multiple -repetitions are requested using the `--benchmark_repetitions` command-line -option, or at registration time, the benchmark function will be run several -times and statistical results across these repetitions will also be reported. - -As well as the per-benchmark entries, a preamble in the report will include -information about the machine on which the benchmarks are run. - - - -### Passing Arguments - -Sometimes a family of benchmarks can be implemented with just one routine that -takes an extra argument to specify which one of the family of benchmarks to -run. For example, the following code defines a family of benchmarks for -measuring the speed of `memcpy()` calls of different lengths: - -```c++ -static void BM_memcpy(benchmark::State& state) { - char* src = new char[state.range(0)]; - char* dst = new char[state.range(0)]; - memset(src, 'x', state.range(0)); - for (auto _ : state) - memcpy(dst, src, state.range(0)); - state.SetBytesProcessed(int64_t(state.iterations()) * - int64_t(state.range(0))); - delete[] src; - delete[] dst; -} -BENCHMARK(BM_memcpy)->Arg(8)->Arg(64)->Arg(512)->Arg(1<<10)->Arg(8<<10); -``` - -The preceding code is quite repetitive, and can be replaced with the following -short-hand. The following invocation will pick a few appropriate arguments in -the specified range and will generate a benchmark for each such argument. - -```c++ -BENCHMARK(BM_memcpy)->Range(8, 8<<10); -``` - -By default the arguments in the range are generated in multiples of eight and -the command above selects [ 8, 64, 512, 4k, 8k ]. In the following code the -range multiplier is changed to multiples of two. - -```c++ -BENCHMARK(BM_memcpy)->RangeMultiplier(2)->Range(8, 8<<10); -``` - -Now arguments generated are [ 8, 16, 32, 64, 128, 256, 512, 1024, 2k, 4k, 8k ]. - -The preceding code shows a method of defining a sparse range. The following -example shows a method of defining a dense range. It is then used to benchmark -the performance of `std::vector` initialization for uniformly increasing sizes. - -```c++ -static void BM_DenseRange(benchmark::State& state) { - for(auto _ : state) { - std::vector v(state.range(0), state.range(0)); - benchmark::DoNotOptimize(v.data()); - benchmark::ClobberMemory(); - } -} -BENCHMARK(BM_DenseRange)->DenseRange(0, 1024, 128); -``` - -Now arguments generated are [ 0, 128, 256, 384, 512, 640, 768, 896, 1024 ]. - -You might have a benchmark that depends on two or more inputs. For example, the -following code defines a family of benchmarks for measuring the speed of set -insertion. - -```c++ -static void BM_SetInsert(benchmark::State& state) { - std::set data; - for (auto _ : state) { - state.PauseTiming(); - data = ConstructRandomSet(state.range(0)); - state.ResumeTiming(); - for (int j = 0; j < state.range(1); ++j) - data.insert(RandomNumber()); - } -} -BENCHMARK(BM_SetInsert) - ->Args({1<<10, 128}) - ->Args({2<<10, 128}) - ->Args({4<<10, 128}) - ->Args({8<<10, 128}) - ->Args({1<<10, 512}) - ->Args({2<<10, 512}) - ->Args({4<<10, 512}) - ->Args({8<<10, 512}); -``` - -The preceding code is quite repetitive, and can be replaced with the following -short-hand. The following macro will pick a few appropriate arguments in the -product of the two specified ranges and will generate a benchmark for each such -pair. - -```c++ -BENCHMARK(BM_SetInsert)->Ranges({{1<<10, 8<<10}, {128, 512}}); -``` - -Some benchmarks may require specific argument values that cannot be expressed -with `Ranges`. In this case, `ArgsProduct` offers the ability to generate a -benchmark input for each combination in the product of the supplied vectors. - -```c++ -BENCHMARK(BM_SetInsert) - ->ArgsProduct({{1<<10, 3<<10, 8<<10}, {20, 40, 60, 80}}) -// would generate the same benchmark arguments as -BENCHMARK(BM_SetInsert) - ->Args({1<<10, 20}) - ->Args({3<<10, 20}) - ->Args({8<<10, 20}) - ->Args({3<<10, 40}) - ->Args({8<<10, 40}) - ->Args({1<<10, 40}) - ->Args({1<<10, 60}) - ->Args({3<<10, 60}) - ->Args({8<<10, 60}) - ->Args({1<<10, 80}) - ->Args({3<<10, 80}) - ->Args({8<<10, 80}); -``` - -For more complex patterns of inputs, passing a custom function to `Apply` allows -programmatic specification of an arbitrary set of arguments on which to run the -benchmark. The following example enumerates a dense range on one parameter, -and a sparse range on the second. - -```c++ -static void CustomArguments(benchmark::internal::Benchmark* b) { - for (int i = 0; i <= 10; ++i) - for (int j = 32; j <= 1024*1024; j *= 8) - b->Args({i, j}); -} -BENCHMARK(BM_SetInsert)->Apply(CustomArguments); -``` - -#### Passing Arbitrary Arguments to a Benchmark - -In C++11 it is possible to define a benchmark that takes an arbitrary number -of extra arguments. The `BENCHMARK_CAPTURE(func, test_case_name, ...args)` -macro creates a benchmark that invokes `func` with the `benchmark::State` as -the first argument followed by the specified `args...`. -The `test_case_name` is appended to the name of the benchmark and -should describe the values passed. - -```c++ -template -void BM_takes_args(benchmark::State& state, ExtraArgs&&... extra_args) { - [...] -} -// Registers a benchmark named "BM_takes_args/int_string_test" that passes -// the specified values to `extra_args`. -BENCHMARK_CAPTURE(BM_takes_args, int_string_test, 42, std::string("abc")); -``` - -Note that elements of `...args` may refer to global variables. Users should -avoid modifying global state inside of a benchmark. - - - -### Calculating Asymptotic Complexity (Big O) - -Asymptotic complexity might be calculated for a family of benchmarks. The -following code will calculate the coefficient for the high-order term in the -running time and the normalized root-mean square error of string comparison. - -```c++ -static void BM_StringCompare(benchmark::State& state) { - std::string s1(state.range(0), '-'); - std::string s2(state.range(0), '-'); - for (auto _ : state) { - benchmark::DoNotOptimize(s1.compare(s2)); - } - state.SetComplexityN(state.range(0)); -} -BENCHMARK(BM_StringCompare) - ->RangeMultiplier(2)->Range(1<<10, 1<<18)->Complexity(benchmark::oN); -``` - -As shown in the following invocation, asymptotic complexity might also be -calculated automatically. - -```c++ -BENCHMARK(BM_StringCompare) - ->RangeMultiplier(2)->Range(1<<10, 1<<18)->Complexity(); -``` - -The following code will specify asymptotic complexity with a lambda function, -that might be used to customize high-order term calculation. - -```c++ -BENCHMARK(BM_StringCompare)->RangeMultiplier(2) - ->Range(1<<10, 1<<18)->Complexity([](benchmark::IterationCount n)->double{return n; }); -``` - - - -### Custom Benchmark Name - -You can change the benchmark's name as follows: - -```c++ -BENCHMARK(BM_memcpy)->Name("memcpy")->RangeMultiplier(2)->Range(8, 8<<10); -``` - -The invocation will execute the benchmark as before using `BM_memcpy` but changes -the prefix in the report to `memcpy`. - - - -### Templated Benchmarks - -This example produces and consumes messages of size `sizeof(v)` `range_x` -times. It also outputs throughput in the absence of multiprogramming. - -```c++ -template void BM_Sequential(benchmark::State& state) { - Q q; - typename Q::value_type v; - for (auto _ : state) { - for (int i = state.range(0); i--; ) - q.push(v); - for (int e = state.range(0); e--; ) - q.Wait(&v); - } - // actually messages, not bytes: - state.SetBytesProcessed( - static_cast(state.iterations())*state.range(0)); -} -BENCHMARK_TEMPLATE(BM_Sequential, WaitQueue)->Range(1<<0, 1<<10); -``` - -Three macros are provided for adding benchmark templates. - -```c++ -#ifdef BENCHMARK_HAS_CXX11 -#define BENCHMARK_TEMPLATE(func, ...) // Takes any number of parameters. -#else // C++ < C++11 -#define BENCHMARK_TEMPLATE(func, arg1) -#endif -#define BENCHMARK_TEMPLATE1(func, arg1) -#define BENCHMARK_TEMPLATE2(func, arg1, arg2) -``` - - - -### Fixtures - -Fixture tests are created by first defining a type that derives from -`::benchmark::Fixture` and then creating/registering the tests using the -following macros: - -* `BENCHMARK_F(ClassName, Method)` -* `BENCHMARK_DEFINE_F(ClassName, Method)` -* `BENCHMARK_REGISTER_F(ClassName, Method)` - -For Example: - -```c++ -class MyFixture : public benchmark::Fixture { -public: - void SetUp(const ::benchmark::State& state) { - } - - void TearDown(const ::benchmark::State& state) { - } -}; - -BENCHMARK_F(MyFixture, FooTest)(benchmark::State& st) { - for (auto _ : st) { - ... - } -} - -BENCHMARK_DEFINE_F(MyFixture, BarTest)(benchmark::State& st) { - for (auto _ : st) { - ... - } -} -/* BarTest is NOT registered */ -BENCHMARK_REGISTER_F(MyFixture, BarTest)->Threads(2); -/* BarTest is now registered */ -``` - -#### Templated Fixtures - -Also you can create templated fixture by using the following macros: - -* `BENCHMARK_TEMPLATE_F(ClassName, Method, ...)` -* `BENCHMARK_TEMPLATE_DEFINE_F(ClassName, Method, ...)` - -For example: - -```c++ -template -class MyFixture : public benchmark::Fixture {}; - -BENCHMARK_TEMPLATE_F(MyFixture, IntTest, int)(benchmark::State& st) { - for (auto _ : st) { - ... - } -} - -BENCHMARK_TEMPLATE_DEFINE_F(MyFixture, DoubleTest, double)(benchmark::State& st) { - for (auto _ : st) { - ... - } -} - -BENCHMARK_REGISTER_F(MyFixture, DoubleTest)->Threads(2); -``` - - - -### Custom Counters - -You can add your own counters with user-defined names. The example below -will add columns "Foo", "Bar" and "Baz" in its output: - -```c++ -static void UserCountersExample1(benchmark::State& state) { - double numFoos = 0, numBars = 0, numBazs = 0; - for (auto _ : state) { - // ... count Foo,Bar,Baz events - } - state.counters["Foo"] = numFoos; - state.counters["Bar"] = numBars; - state.counters["Baz"] = numBazs; -} -``` - -The `state.counters` object is a `std::map` with `std::string` keys -and `Counter` values. The latter is a `double`-like class, via an implicit -conversion to `double&`. Thus you can use all of the standard arithmetic -assignment operators (`=,+=,-=,*=,/=`) to change the value of each counter. - -In multithreaded benchmarks, each counter is set on the calling thread only. -When the benchmark finishes, the counters from each thread will be summed; -the resulting sum is the value which will be shown for the benchmark. - -The `Counter` constructor accepts three parameters: the value as a `double` -; a bit flag which allows you to show counters as rates, and/or as per-thread -iteration, and/or as per-thread averages, and/or iteration invariants, -and/or finally inverting the result; and a flag specifying the 'unit' - i.e. -is 1k a 1000 (default, `benchmark::Counter::OneK::kIs1000`), or 1024 -(`benchmark::Counter::OneK::kIs1024`)? - -```c++ - // sets a simple counter - state.counters["Foo"] = numFoos; - - // Set the counter as a rate. It will be presented divided - // by the duration of the benchmark. - // Meaning: per one second, how many 'foo's are processed? - state.counters["FooRate"] = Counter(numFoos, benchmark::Counter::kIsRate); - - // Set the counter as a rate. It will be presented divided - // by the duration of the benchmark, and the result inverted. - // Meaning: how many seconds it takes to process one 'foo'? - state.counters["FooInvRate"] = Counter(numFoos, benchmark::Counter::kIsRate | benchmark::Counter::kInvert); - - // Set the counter as a thread-average quantity. It will - // be presented divided by the number of threads. - state.counters["FooAvg"] = Counter(numFoos, benchmark::Counter::kAvgThreads); - - // There's also a combined flag: - state.counters["FooAvgRate"] = Counter(numFoos,benchmark::Counter::kAvgThreadsRate); - - // This says that we process with the rate of state.range(0) bytes every iteration: - state.counters["BytesProcessed"] = Counter(state.range(0), benchmark::Counter::kIsIterationInvariantRate, benchmark::Counter::OneK::kIs1024); -``` - -When you're compiling in C++11 mode or later you can use `insert()` with -`std::initializer_list`: - -```c++ - // With C++11, this can be done: - state.counters.insert({{"Foo", numFoos}, {"Bar", numBars}, {"Baz", numBazs}}); - // ... instead of: - state.counters["Foo"] = numFoos; - state.counters["Bar"] = numBars; - state.counters["Baz"] = numBazs; -``` - -#### Counter Reporting - -When using the console reporter, by default, user counters are printed at -the end after the table, the same way as ``bytes_processed`` and -``items_processed``. This is best for cases in which there are few counters, -or where there are only a couple of lines per benchmark. Here's an example of -the default output: - -``` ------------------------------------------------------------------------------- -Benchmark Time CPU Iterations UserCounters... ------------------------------------------------------------------------------- -BM_UserCounter/threads:8 2248 ns 10277 ns 68808 Bar=16 Bat=40 Baz=24 Foo=8 -BM_UserCounter/threads:1 9797 ns 9788 ns 71523 Bar=2 Bat=5 Baz=3 Foo=1024m -BM_UserCounter/threads:2 4924 ns 9842 ns 71036 Bar=4 Bat=10 Baz=6 Foo=2 -BM_UserCounter/threads:4 2589 ns 10284 ns 68012 Bar=8 Bat=20 Baz=12 Foo=4 -BM_UserCounter/threads:8 2212 ns 10287 ns 68040 Bar=16 Bat=40 Baz=24 Foo=8 -BM_UserCounter/threads:16 1782 ns 10278 ns 68144 Bar=32 Bat=80 Baz=48 Foo=16 -BM_UserCounter/threads:32 1291 ns 10296 ns 68256 Bar=64 Bat=160 Baz=96 Foo=32 -BM_UserCounter/threads:4 2615 ns 10307 ns 68040 Bar=8 Bat=20 Baz=12 Foo=4 -BM_Factorial 26 ns 26 ns 26608979 40320 -BM_Factorial/real_time 26 ns 26 ns 26587936 40320 -BM_CalculatePiRange/1 16 ns 16 ns 45704255 0 -BM_CalculatePiRange/8 73 ns 73 ns 9520927 3.28374 -BM_CalculatePiRange/64 609 ns 609 ns 1140647 3.15746 -BM_CalculatePiRange/512 4900 ns 4901 ns 142696 3.14355 -``` - -If this doesn't suit you, you can print each counter as a table column by -passing the flag `--benchmark_counters_tabular=true` to the benchmark -application. This is best for cases in which there are a lot of counters, or -a lot of lines per individual benchmark. Note that this will trigger a -reprinting of the table header any time the counter set changes between -individual benchmarks. Here's an example of corresponding output when -`--benchmark_counters_tabular=true` is passed: - -``` ---------------------------------------------------------------------------------------- -Benchmark Time CPU Iterations Bar Bat Baz Foo ---------------------------------------------------------------------------------------- -BM_UserCounter/threads:8 2198 ns 9953 ns 70688 16 40 24 8 -BM_UserCounter/threads:1 9504 ns 9504 ns 73787 2 5 3 1 -BM_UserCounter/threads:2 4775 ns 9550 ns 72606 4 10 6 2 -BM_UserCounter/threads:4 2508 ns 9951 ns 70332 8 20 12 4 -BM_UserCounter/threads:8 2055 ns 9933 ns 70344 16 40 24 8 -BM_UserCounter/threads:16 1610 ns 9946 ns 70720 32 80 48 16 -BM_UserCounter/threads:32 1192 ns 9948 ns 70496 64 160 96 32 -BM_UserCounter/threads:4 2506 ns 9949 ns 70332 8 20 12 4 --------------------------------------------------------------- -Benchmark Time CPU Iterations --------------------------------------------------------------- -BM_Factorial 26 ns 26 ns 26392245 40320 -BM_Factorial/real_time 26 ns 26 ns 26494107 40320 -BM_CalculatePiRange/1 15 ns 15 ns 45571597 0 -BM_CalculatePiRange/8 74 ns 74 ns 9450212 3.28374 -BM_CalculatePiRange/64 595 ns 595 ns 1173901 3.15746 -BM_CalculatePiRange/512 4752 ns 4752 ns 147380 3.14355 -BM_CalculatePiRange/4k 37970 ns 37972 ns 18453 3.14184 -BM_CalculatePiRange/32k 303733 ns 303744 ns 2305 3.14162 -BM_CalculatePiRange/256k 2434095 ns 2434186 ns 288 3.1416 -BM_CalculatePiRange/1024k 9721140 ns 9721413 ns 71 3.14159 -BM_CalculatePi/threads:8 2255 ns 9943 ns 70936 -``` - -Note above the additional header printed when the benchmark changes from -``BM_UserCounter`` to ``BM_Factorial``. This is because ``BM_Factorial`` does -not have the same counter set as ``BM_UserCounter``. - - - -### Multithreaded Benchmarks - -In a multithreaded test (benchmark invoked by multiple threads simultaneously), -it is guaranteed that none of the threads will start until all have reached -the start of the benchmark loop, and all will have finished before any thread -exits the benchmark loop. (This behavior is also provided by the `KeepRunning()` -API) As such, any global setup or teardown can be wrapped in a check against the thread -index: - -```c++ -static void BM_MultiThreaded(benchmark::State& state) { - if (state.thread_index == 0) { - // Setup code here. - } - for (auto _ : state) { - // Run the test as normal. - } - if (state.thread_index == 0) { - // Teardown code here. - } -} -BENCHMARK(BM_MultiThreaded)->Threads(2); -``` - -If the benchmarked code itself uses threads and you want to compare it to -single-threaded code, you may want to use real-time ("wallclock") measurements -for latency comparisons: - -```c++ -BENCHMARK(BM_test)->Range(8, 8<<10)->UseRealTime(); -``` - -Without `UseRealTime`, CPU time is used by default. - - - -### CPU Timers - -By default, the CPU timer only measures the time spent by the main thread. -If the benchmark itself uses threads internally, this measurement may not -be what you are looking for. Instead, there is a way to measure the total -CPU usage of the process, by all the threads. - -```c++ -void callee(int i); - -static void MyMain(int size) { -#pragma omp parallel for - for(int i = 0; i < size; i++) - callee(i); -} - -static void BM_OpenMP(benchmark::State& state) { - for (auto _ : state) - MyMain(state.range(0)); -} - -// Measure the time spent by the main thread, use it to decide for how long to -// run the benchmark loop. Depending on the internal implementation detail may -// measure to anywhere from near-zero (the overhead spent before/after work -// handoff to worker thread[s]) to the whole single-thread time. -BENCHMARK(BM_OpenMP)->Range(8, 8<<10); - -// Measure the user-visible time, the wall clock (literally, the time that -// has passed on the clock on the wall), use it to decide for how long to -// run the benchmark loop. This will always be meaningful, an will match the -// time spent by the main thread in single-threaded case, in general decreasing -// with the number of internal threads doing the work. -BENCHMARK(BM_OpenMP)->Range(8, 8<<10)->UseRealTime(); - -// Measure the total CPU consumption, use it to decide for how long to -// run the benchmark loop. This will always measure to no less than the -// time spent by the main thread in single-threaded case. -BENCHMARK(BM_OpenMP)->Range(8, 8<<10)->MeasureProcessCPUTime(); - -// A mixture of the last two. Measure the total CPU consumption, but use the -// wall clock to decide for how long to run the benchmark loop. -BENCHMARK(BM_OpenMP)->Range(8, 8<<10)->MeasureProcessCPUTime()->UseRealTime(); -``` - -#### Controlling Timers - -Normally, the entire duration of the work loop (`for (auto _ : state) {}`) -is measured. But sometimes, it is necessary to do some work inside of -that loop, every iteration, but without counting that time to the benchmark time. -That is possible, although it is not recommended, since it has high overhead. - -```c++ -static void BM_SetInsert_With_Timer_Control(benchmark::State& state) { - std::set data; - for (auto _ : state) { - state.PauseTiming(); // Stop timers. They will not count until they are resumed. - data = ConstructRandomSet(state.range(0)); // Do something that should not be measured - state.ResumeTiming(); // And resume timers. They are now counting again. - // The rest will be measured. - for (int j = 0; j < state.range(1); ++j) - data.insert(RandomNumber()); - } -} -BENCHMARK(BM_SetInsert_With_Timer_Control)->Ranges({{1<<10, 8<<10}, {128, 512}}); -``` - - - -### Manual Timing - -For benchmarking something for which neither CPU time nor real-time are -correct or accurate enough, completely manual timing is supported using -the `UseManualTime` function. - -When `UseManualTime` is used, the benchmarked code must call -`SetIterationTime` once per iteration of the benchmark loop to -report the manually measured time. - -An example use case for this is benchmarking GPU execution (e.g. OpenCL -or CUDA kernels, OpenGL or Vulkan or Direct3D draw calls), which cannot -be accurately measured using CPU time or real-time. Instead, they can be -measured accurately using a dedicated API, and these measurement results -can be reported back with `SetIterationTime`. - -```c++ -static void BM_ManualTiming(benchmark::State& state) { - int microseconds = state.range(0); - std::chrono::duration sleep_duration { - static_cast(microseconds) - }; - - for (auto _ : state) { - auto start = std::chrono::high_resolution_clock::now(); - // Simulate some useful workload with a sleep - std::this_thread::sleep_for(sleep_duration); - auto end = std::chrono::high_resolution_clock::now(); - - auto elapsed_seconds = - std::chrono::duration_cast>( - end - start); - - state.SetIterationTime(elapsed_seconds.count()); - } -} -BENCHMARK(BM_ManualTiming)->Range(1, 1<<17)->UseManualTime(); -``` - - - -### Setting the Time Unit - -If a benchmark runs a few milliseconds it may be hard to visually compare the -measured times, since the output data is given in nanoseconds per default. In -order to manually set the time unit, you can specify it manually: - -```c++ -BENCHMARK(BM_test)->Unit(benchmark::kMillisecond); -``` - - - -### Preventing Optimization - -To prevent a value or expression from being optimized away by the compiler -the `benchmark::DoNotOptimize(...)` and `benchmark::ClobberMemory()` -functions can be used. - -```c++ -static void BM_test(benchmark::State& state) { - for (auto _ : state) { - int x = 0; - for (int i=0; i < 64; ++i) { - benchmark::DoNotOptimize(x += i); - } - } -} -``` - -`DoNotOptimize()` forces the *result* of `` to be stored in either -memory or a register. For GNU based compilers it acts as read/write barrier -for global memory. More specifically it forces the compiler to flush pending -writes to memory and reload any other values as necessary. - -Note that `DoNotOptimize()` does not prevent optimizations on `` -in any way. `` may even be removed entirely when the result is already -known. For example: - -```c++ - /* Example 1: `` is removed entirely. */ - int foo(int x) { return x + 42; } - while (...) DoNotOptimize(foo(0)); // Optimized to DoNotOptimize(42); - - /* Example 2: Result of '' is only reused */ - int bar(int) __attribute__((const)); - while (...) DoNotOptimize(bar(0)); // Optimized to: - // int __result__ = bar(0); - // while (...) DoNotOptimize(__result__); -``` - -The second tool for preventing optimizations is `ClobberMemory()`. In essence -`ClobberMemory()` forces the compiler to perform all pending writes to global -memory. Memory managed by block scope objects must be "escaped" using -`DoNotOptimize(...)` before it can be clobbered. In the below example -`ClobberMemory()` prevents the call to `v.push_back(42)` from being optimized -away. - -```c++ -static void BM_vector_push_back(benchmark::State& state) { - for (auto _ : state) { - std::vector v; - v.reserve(1); - benchmark::DoNotOptimize(v.data()); // Allow v.data() to be clobbered. - v.push_back(42); - benchmark::ClobberMemory(); // Force 42 to be written to memory. - } -} -``` - -Note that `ClobberMemory()` is only available for GNU or MSVC based compilers. - - - -### Statistics: Reporting the Mean, Median and Standard Deviation of Repeated Benchmarks - -By default each benchmark is run once and that single result is reported. -However benchmarks are often noisy and a single result may not be representative -of the overall behavior. For this reason it's possible to repeatedly rerun the -benchmark. - -The number of runs of each benchmark is specified globally by the -`--benchmark_repetitions` flag or on a per benchmark basis by calling -`Repetitions` on the registered benchmark object. When a benchmark is run more -than once the mean, median and standard deviation of the runs will be reported. - -Additionally the `--benchmark_report_aggregates_only={true|false}`, -`--benchmark_display_aggregates_only={true|false}` flags or -`ReportAggregatesOnly(bool)`, `DisplayAggregatesOnly(bool)` functions can be -used to change how repeated tests are reported. By default the result of each -repeated run is reported. When `report aggregates only` option is `true`, -only the aggregates (i.e. mean, median and standard deviation, maybe complexity -measurements if they were requested) of the runs is reported, to both the -reporters - standard output (console), and the file. -However when only the `display aggregates only` option is `true`, -only the aggregates are displayed in the standard output, while the file -output still contains everything. -Calling `ReportAggregatesOnly(bool)` / `DisplayAggregatesOnly(bool)` on a -registered benchmark object overrides the value of the appropriate flag for that -benchmark. - - - -### Custom Statistics - -While having mean, median and standard deviation is nice, this may not be -enough for everyone. For example you may want to know what the largest -observation is, e.g. because you have some real-time constraints. This is easy. -The following code will specify a custom statistic to be calculated, defined -by a lambda function. - -```c++ -void BM_spin_empty(benchmark::State& state) { - for (auto _ : state) { - for (int x = 0; x < state.range(0); ++x) { - benchmark::DoNotOptimize(x); - } - } -} - -BENCHMARK(BM_spin_empty) - ->ComputeStatistics("max", [](const std::vector& v) -> double { - return *(std::max_element(std::begin(v), std::end(v))); - }) - ->Arg(512); -``` - - - -### Using RegisterBenchmark(name, fn, args...) - -The `RegisterBenchmark(name, func, args...)` function provides an alternative -way to create and register benchmarks. -`RegisterBenchmark(name, func, args...)` creates, registers, and returns a -pointer to a new benchmark with the specified `name` that invokes -`func(st, args...)` where `st` is a `benchmark::State` object. - -Unlike the `BENCHMARK` registration macros, which can only be used at the global -scope, the `RegisterBenchmark` can be called anywhere. This allows for -benchmark tests to be registered programmatically. - -Additionally `RegisterBenchmark` allows any callable object to be registered -as a benchmark. Including capturing lambdas and function objects. - -For Example: -```c++ -auto BM_test = [](benchmark::State& st, auto Inputs) { /* ... */ }; - -int main(int argc, char** argv) { - for (auto& test_input : { /* ... */ }) - benchmark::RegisterBenchmark(test_input.name(), BM_test, test_input); - benchmark::Initialize(&argc, argv); - benchmark::RunSpecifiedBenchmarks(); - benchmark::Shutdown(); -} -``` - - - -### Exiting with an Error - -When errors caused by external influences, such as file I/O and network -communication, occur within a benchmark the -`State::SkipWithError(const char* msg)` function can be used to skip that run -of benchmark and report the error. Note that only future iterations of the -`KeepRunning()` are skipped. For the ranged-for version of the benchmark loop -Users must explicitly exit the loop, otherwise all iterations will be performed. -Users may explicitly return to exit the benchmark immediately. - -The `SkipWithError(...)` function may be used at any point within the benchmark, -including before and after the benchmark loop. Moreover, if `SkipWithError(...)` -has been used, it is not required to reach the benchmark loop and one may return -from the benchmark function early. - -For example: - -```c++ -static void BM_test(benchmark::State& state) { - auto resource = GetResource(); - if (!resource.good()) { - state.SkipWithError("Resource is not good!"); - // KeepRunning() loop will not be entered. - } - while (state.KeepRunning()) { - auto data = resource.read_data(); - if (!resource.good()) { - state.SkipWithError("Failed to read data!"); - break; // Needed to skip the rest of the iteration. - } - do_stuff(data); - } -} - -static void BM_test_ranged_fo(benchmark::State & state) { - auto resource = GetResource(); - if (!resource.good()) { - state.SkipWithError("Resource is not good!"); - return; // Early return is allowed when SkipWithError() has been used. - } - for (auto _ : state) { - auto data = resource.read_data(); - if (!resource.good()) { - state.SkipWithError("Failed to read data!"); - break; // REQUIRED to prevent all further iterations. - } - do_stuff(data); - } -} -``` - - -### A Faster KeepRunning Loop - -In C++11 mode, a ranged-based for loop should be used in preference to -the `KeepRunning` loop for running the benchmarks. For example: - -```c++ -static void BM_Fast(benchmark::State &state) { - for (auto _ : state) { - FastOperation(); - } -} -BENCHMARK(BM_Fast); -``` - -The reason the ranged-for loop is faster than using `KeepRunning`, is -because `KeepRunning` requires a memory load and store of the iteration count -ever iteration, whereas the ranged-for variant is able to keep the iteration count -in a register. - -For example, an empty inner loop of using the ranged-based for method looks like: - -```asm -# Loop Init - mov rbx, qword ptr [r14 + 104] - call benchmark::State::StartKeepRunning() - test rbx, rbx - je .LoopEnd -.LoopHeader: # =>This Inner Loop Header: Depth=1 - add rbx, -1 - jne .LoopHeader -.LoopEnd: -``` - -Compared to an empty `KeepRunning` loop, which looks like: - -```asm -.LoopHeader: # in Loop: Header=BB0_3 Depth=1 - cmp byte ptr [rbx], 1 - jne .LoopInit -.LoopBody: # =>This Inner Loop Header: Depth=1 - mov rax, qword ptr [rbx + 8] - lea rcx, [rax + 1] - mov qword ptr [rbx + 8], rcx - cmp rax, qword ptr [rbx + 104] - jb .LoopHeader - jmp .LoopEnd -.LoopInit: - mov rdi, rbx - call benchmark::State::StartKeepRunning() - jmp .LoopBody -.LoopEnd: -``` - -Unless C++03 compatibility is required, the ranged-for variant of writing -the benchmark loop should be preferred. - - - -### Disabling CPU Frequency Scaling - -If you see this error: - -``` -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -``` - -you might want to disable the CPU frequency scaling while running the benchmark: - -```bash -sudo cpupower frequency-set --governor performance -./mybench -sudo cpupower frequency-set --governor powersave -``` diff --git a/third-party/benchmark/WORKSPACE b/third-party/benchmark/WORKSPACE index 631f3ba05de5..949eb98bc5d9 100644 --- a/third-party/benchmark/WORKSPACE +++ b/third-party/benchmark/WORKSPACE @@ -1,13 +1,7 @@ workspace(name = "com_github_google_benchmark") load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") - -http_archive( - name = "rules_cc", - strip_prefix = "rules_cc-a508235df92e71d537fcbae0c7c952ea6957a912", - urls = ["https://github.com/bazelbuild/rules_cc/archive/a508235df92e71d537fcbae0c7c952ea6957a912.zip"], - sha256 = "d7dc12c1d5bc1a87474de8e3d17b7731a4dcebcfb8aa3990fe8ac7734ef12f2f", -) +load("@bazel_tools//tools/build_defs/repo:git.bzl", "git_repository") http_archive( name = "com_google_absl", @@ -16,11 +10,10 @@ http_archive( urls = ["https://github.com/abseil/abseil-cpp/archive/20200225.2.tar.gz"], ) -http_archive( +git_repository( name = "com_google_googletest", - strip_prefix = "googletest-3f0cf6b62ad1eb50d8736538363d3580dd640c3e", - urls = ["https://github.com/google/googletest/archive/3f0cf6b62ad1eb50d8736538363d3580dd640c3e.zip"], - sha256 = "8f827dd550db8b4fdf73904690df0be9fccc161017c9038a724bc9a0617a1bc8", + remote = "https://github.com/google/googletest.git", + tag = "release-1.11.0", ) http_archive( diff --git a/third-party/benchmark/bindings/python/google_benchmark/benchmark.cc b/third-party/benchmark/bindings/python/google_benchmark/benchmark.cc index 1b01fe7f7f0f..02b6ed7ed590 100644 --- a/third-party/benchmark/bindings/python/google_benchmark/benchmark.cc +++ b/third-party/benchmark/bindings/python/google_benchmark/benchmark.cc @@ -165,12 +165,12 @@ PYBIND11_MODULE(_benchmark, m) { &State::SetComplexityN) .def_property("items_processed", &State::items_processed, &State::SetItemsProcessed) - .def("set_label", (void (State::*)(const char*)) & State::SetLabel) + .def("set_label", (void(State::*)(const char*)) & State::SetLabel) .def("range", &State::range, py::arg("pos") = 0) .def_property_readonly("iterations", &State::iterations) .def_readwrite("counters", &State::counters) - .def_readonly("thread_index", &State::thread_index) - .def_readonly("threads", &State::threads); + .def_property_readonly("thread_index", &State::thread_index) + .def_property_readonly("threads", &State::threads); m.def("Initialize", Initialize); m.def("RegisterBenchmark", RegisterBenchmark, diff --git a/third-party/benchmark/bindings/python/google_benchmark/example.py b/third-party/benchmark/bindings/python/google_benchmark/example.py index 9134e8cffeaf..487acc9f1e09 100644 --- a/third-party/benchmark/bindings/python/google_benchmark/example.py +++ b/third-party/benchmark/bindings/python/google_benchmark/example.py @@ -102,7 +102,7 @@ def with_options(state): @benchmark.register(name="sum_million_microseconds") @benchmark.option.unit(benchmark.kMicrosecond) -def with_options(state): +def with_options2(state): while state: sum(range(1_000_000)) diff --git a/third-party/benchmark/cmake/Config.cmake.in b/third-party/benchmark/cmake/Config.cmake.in index 6e9256eea8a2..2e15f0cf82dc 100644 --- a/third-party/benchmark/cmake/Config.cmake.in +++ b/third-party/benchmark/cmake/Config.cmake.in @@ -1 +1,7 @@ +@PACKAGE_INIT@ + +include (CMakeFindDependencyMacro) + +find_dependency (Threads) + include("${CMAKE_CURRENT_LIST_DIR}/@targets_export_name@.cmake") diff --git a/third-party/benchmark/cmake/GoogleTest.cmake b/third-party/benchmark/cmake/GoogleTest.cmake index dd611fc875f1..66cb91008b73 100644 --- a/third-party/benchmark/cmake/GoogleTest.cmake +++ b/third-party/benchmark/cmake/GoogleTest.cmake @@ -29,13 +29,20 @@ set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) include(${GOOGLETEST_PREFIX}/googletest-paths.cmake) +# googletest doesn't seem to want to stay build warning clean so let's not hurt ourselves. +add_compile_options(-w) + # Add googletest directly to our build. This defines # the gtest and gtest_main targets. add_subdirectory(${GOOGLETEST_SOURCE_DIR} ${GOOGLETEST_BINARY_DIR} EXCLUDE_FROM_ALL) -set_target_properties(gtest PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES $) -set_target_properties(gtest_main PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES $) -set_target_properties(gmock PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES $) -set_target_properties(gmock_main PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES $) +if(NOT DEFINED GTEST_COMPILE_COMMANDS) + set(GTEST_COMPILE_COMMANDS ON) +endif() + +set_target_properties(gtest PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES $ EXPORT_COMPILE_COMMANDS ${GTEST_COMPILE_COMMANDS}) +set_target_properties(gtest_main PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES $ EXPORT_COMPILE_COMMANDS ${GTEST_COMPILE_COMMANDS}) +set_target_properties(gmock PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES $ EXPORT_COMPILE_COMMANDS ${GTEST_COMPILE_COMMANDS}) +set_target_properties(gmock_main PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES $ EXPORT_COMPILE_COMMANDS ${GTEST_COMPILE_COMMANDS}) diff --git a/third-party/benchmark/cmake/GoogleTest.cmake.in b/third-party/benchmark/cmake/GoogleTest.cmake.in index fd957ff56409..ce653ac375ac 100644 --- a/third-party/benchmark/cmake/GoogleTest.cmake.in +++ b/third-party/benchmark/cmake/GoogleTest.cmake.in @@ -31,13 +31,14 @@ if(EXISTS "${GOOGLETEST_PATH}" AND IS_DIRECTORY "${GOOGLETEST_PATH}" ) else() if(NOT ALLOW_DOWNLOADING_GOOGLETEST) - message(SEND_ERROR "Did not find Google Test sources! Either pass correct path in GOOGLETEST_PATH, or enable BENCHMARK_DOWNLOAD_DEPENDENCIES, or disable BENCHMARK_ENABLE_GTEST_TESTS / BENCHMARK_ENABLE_TESTING.") + message(SEND_ERROR "Did not find Google Test sources! Either pass correct path in GOOGLETEST_PATH, or enable BENCHMARK_DOWNLOAD_DEPENDENCIES, or disable BENCHMARK_USE_BUNDLED_GTEST, or disable BENCHMARK_ENABLE_GTEST_TESTS / BENCHMARK_ENABLE_TESTING.") + return() else() message(WARNING "Did not find Google Test sources! Fetching from web...") ExternalProject_Add( googletest GIT_REPOSITORY https://github.com/google/googletest.git - GIT_TAG master + GIT_TAG "release-1.11.0" PREFIX "${CMAKE_BINARY_DIR}" STAMP_DIR "${CMAKE_BINARY_DIR}/stamp" DOWNLOAD_DIR "${CMAKE_BINARY_DIR}/download" diff --git a/third-party/benchmark/cmake/Modules/FindLLVMAr.cmake b/third-party/benchmark/cmake/Modules/FindLLVMAr.cmake new file mode 100644 index 000000000000..23469813cfab --- /dev/null +++ b/third-party/benchmark/cmake/Modules/FindLLVMAr.cmake @@ -0,0 +1,16 @@ +include(FeatureSummary) + +find_program(LLVMAR_EXECUTABLE + NAMES llvm-ar + DOC "The llvm-ar executable" + ) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(LLVMAr + DEFAULT_MSG + LLVMAR_EXECUTABLE) + +SET_PACKAGE_PROPERTIES(LLVMAr PROPERTIES + URL https://llvm.org/docs/CommandGuide/llvm-ar.html + DESCRIPTION "create, modify, and extract from archives" +) diff --git a/third-party/benchmark/cmake/Modules/FindLLVMNm.cmake b/third-party/benchmark/cmake/Modules/FindLLVMNm.cmake new file mode 100644 index 000000000000..e56430a04f6e --- /dev/null +++ b/third-party/benchmark/cmake/Modules/FindLLVMNm.cmake @@ -0,0 +1,16 @@ +include(FeatureSummary) + +find_program(LLVMNM_EXECUTABLE + NAMES llvm-nm + DOC "The llvm-nm executable" + ) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(LLVMNm + DEFAULT_MSG + LLVMNM_EXECUTABLE) + +SET_PACKAGE_PROPERTIES(LLVMNm PROPERTIES + URL https://llvm.org/docs/CommandGuide/llvm-nm.html + DESCRIPTION "list LLVM bitcode and object file’s symbol table" +) diff --git a/third-party/benchmark/cmake/Modules/FindLLVMRanLib.cmake b/third-party/benchmark/cmake/Modules/FindLLVMRanLib.cmake new file mode 100644 index 000000000000..7b53e1a79059 --- /dev/null +++ b/third-party/benchmark/cmake/Modules/FindLLVMRanLib.cmake @@ -0,0 +1,15 @@ +include(FeatureSummary) + +find_program(LLVMRANLIB_EXECUTABLE + NAMES llvm-ranlib + DOC "The llvm-ranlib executable" + ) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(LLVMRanLib + DEFAULT_MSG + LLVMRANLIB_EXECUTABLE) + +SET_PACKAGE_PROPERTIES(LLVMRanLib PROPERTIES + DESCRIPTION "generate index for LLVM archive" +) diff --git a/third-party/benchmark/cmake/Modules/FindPFM.cmake b/third-party/benchmark/cmake/Modules/FindPFM.cmake new file mode 100644 index 000000000000..cf807a1ee9e9 --- /dev/null +++ b/third-party/benchmark/cmake/Modules/FindPFM.cmake @@ -0,0 +1,26 @@ +# If successful, the following variables will be defined: +# HAVE_LIBPFM. +# Set BENCHMARK_ENABLE_LIBPFM to 0 to disable, regardless of libpfm presence. +include(CheckIncludeFile) +include(CheckLibraryExists) +include(FeatureSummary) +enable_language(C) + +set_package_properties(PFM PROPERTIES + URL http://perfmon2.sourceforge.net/ + DESCRIPTION "a helper library to develop monitoring tools" + PURPOSE "Used to program specific performance monitoring events") + +check_library_exists(libpfm.a pfm_initialize "" HAVE_LIBPFM_INITIALIZE) +if(HAVE_LIBPFM_INITIALIZE) + check_include_file(perfmon/perf_event.h HAVE_PERFMON_PERF_EVENT_H) + check_include_file(perfmon/pfmlib.h HAVE_PERFMON_PFMLIB_H) + check_include_file(perfmon/pfmlib_perf_event.h HAVE_PERFMON_PFMLIB_PERF_EVENT_H) + if(HAVE_PERFMON_PERF_EVENT_H AND HAVE_PERFMON_PFMLIB_H AND HAVE_PERFMON_PFMLIB_PERF_EVENT_H) + message("Using Perf Counters.") + set(HAVE_LIBPFM 1) + set(PFM_FOUND 1) + endif() +else() + message("Perf Counters support requested, but was unable to find libpfm.") +endif() diff --git a/third-party/benchmark/docs/_config.yml b/third-party/benchmark/docs/_config.yml index fc24e7a62dc2..2f7efbeab578 100644 --- a/third-party/benchmark/docs/_config.yml +++ b/third-party/benchmark/docs/_config.yml @@ -1 +1 @@ -theme: jekyll-theme-hacker \ No newline at end of file +theme: jekyll-theme-minimal \ No newline at end of file diff --git a/third-party/benchmark/dependencies.md b/third-party/benchmark/docs/dependencies.md similarity index 54% rename from third-party/benchmark/dependencies.md rename to third-party/benchmark/docs/dependencies.md index 6289b4e3548b..7af52b95bd86 100644 --- a/third-party/benchmark/dependencies.md +++ b/third-party/benchmark/docs/dependencies.md @@ -3,16 +3,17 @@ To ensure the broadest compatibility when building the benchmark library, but still allow forward progress, we require any build tooling to be available for: -* Debian stable AND -* The last two Ubuntu LTS releases AND +* Debian stable _and_ +* The last two Ubuntu LTS releases Currently, this means using build tool versions that are available for Ubuntu -16.04 (Xenial), Ubuntu 18.04 (Bionic), and Debian stretch. +18.04 (Bionic Beaver), Ubuntu 20.04 (Focal Fossa), and Debian 11 (bullseye). -_Note, [travis](.travis.yml) runs under Ubuntu 14.04 (Trusty) for linux builds._ +_Note, CI also runs ubuntu-16.04 and ubuntu-14.04 to ensure best effort support +for older versions._ ## cmake The current supported version is cmake 3.5.1 as of 2018-06-06. -_Note, this version is also available for Ubuntu 14.04, the previous Ubuntu LTS +_Note, this version is also available for Ubuntu 14.04, an older Ubuntu LTS release, as `cmake3`._ diff --git a/third-party/benchmark/docs/index.md b/third-party/benchmark/docs/index.md new file mode 100644 index 000000000000..eb82eff9eee0 --- /dev/null +++ b/third-party/benchmark/docs/index.md @@ -0,0 +1,10 @@ +# Benchmark + +* [Assembly Tests](AssemblyTests.md) +* [Dependencies](dependencies.md) +* [Perf Counters](perf_counters.md) +* [Platform Specific Build Instructions](platform_specific_build_instructions.md) +* [Random Interleaving](random_interleaving.md) +* [Releasing](releasing.md) +* [Tools](tools.md) +* [User Guide](user_guide.md) \ No newline at end of file diff --git a/third-party/benchmark/docs/platform_specific_build_instructions.md b/third-party/benchmark/docs/platform_specific_build_instructions.md new file mode 100644 index 000000000000..2d5d6c47eead --- /dev/null +++ b/third-party/benchmark/docs/platform_specific_build_instructions.md @@ -0,0 +1,48 @@ +# Platform Specific Build Instructions + +## Building with GCC + +When the library is built using GCC it is necessary to link with the pthread +library due to how GCC implements `std::thread`. Failing to link to pthread will +lead to runtime exceptions (unless you're using libc++), not linker errors. See +[issue #67](https://github.com/google/benchmark/issues/67) for more details. You +can link to pthread by adding `-pthread` to your linker command. Note, you can +also use `-lpthread`, but there are potential issues with ordering of command +line parameters if you use that. + +On QNX, the pthread library is part of libc and usually included automatically +(see +[`pthread_create()`](https://www.qnx.com/developers/docs/7.1/index.html#com.qnx.doc.neutrino.lib_ref/topic/p/pthread_create.html)). +There's no separate pthread library to link. + +## Building with Visual Studio 2015 or 2017 + +The `shlwapi` library (`-lshlwapi`) is required to support a call to `CPUInfo` which reads the registry. Either add `shlwapi.lib` under `[ Configuration Properties > Linker > Input ]`, or use the following: + +``` +// Alternatively, can add libraries using linker options. +#ifdef _WIN32 +#pragma comment ( lib, "Shlwapi.lib" ) +#ifdef _DEBUG +#pragma comment ( lib, "benchmarkd.lib" ) +#else +#pragma comment ( lib, "benchmark.lib" ) +#endif +#endif +``` + +Can also use the graphical version of CMake: +* Open `CMake GUI`. +* Under `Where to build the binaries`, same path as source plus `build`. +* Under `CMAKE_INSTALL_PREFIX`, same path as source plus `install`. +* Click `Configure`, `Generate`, `Open Project`. +* If build fails, try deleting entire directory and starting again, or unticking options to build less. + +## Building with Intel 2015 Update 1 or Intel System Studio Update 4 + +See instructions for building with Visual Studio. Once built, right click on the solution and change the build to Intel. + +## Building on Solaris + +If you're running benchmarks on solaris, you'll want the kstat library linked in +too (`-lkstat`). \ No newline at end of file diff --git a/third-party/benchmark/docs/releasing.md b/third-party/benchmark/docs/releasing.md index 7a6dfc4017b2..334f93539381 100644 --- a/third-party/benchmark/docs/releasing.md +++ b/third-party/benchmark/docs/releasing.md @@ -8,10 +8,23 @@ * `git log $(git describe --abbrev=0 --tags)..HEAD` gives you the list of commits between the last annotated tag and HEAD * Pick the most interesting. -* Create one last commit that updates the version saved in `CMakeLists.txt` to the release version you're creating. (This version will be used if benchmark is installed from the archive you'll be creating in the next step.) +* Create one last commit that updates the version saved in `CMakeLists.txt` and the + `__version__` variable in `bindings/python/google_benchmark/__init__.py`to the release + version you're creating. (This version will be used if benchmark is installed from the + archive you'll be creating in the next step.) ``` -project (benchmark VERSION 1.5.3 LANGUAGES CXX) +project (benchmark VERSION 1.6.0 LANGUAGES CXX) +``` + +```python +# bindings/python/google_benchmark/__init__.py + +# ... + +__version__ = "1.6.0" # <-- change this to the release version you are creating + +# ... ``` * Create a release through github's interface @@ -19,4 +32,4 @@ project (benchmark VERSION 1.5.3 LANGUAGES CXX) * Update this to an annotated tag: * `git pull --tags` * `git tag -a -f ` - * `git push --force origin` + * `git push --force --tags origin` diff --git a/third-party/benchmark/docs/user_guide.md b/third-party/benchmark/docs/user_guide.md new file mode 100644 index 000000000000..34bea6904240 --- /dev/null +++ b/third-party/benchmark/docs/user_guide.md @@ -0,0 +1,1200 @@ +# User Guide + +## Command Line + +[Output Formats](#output-formats) + +[Output Files](#output-files) + +[Running Benchmarks](#running-benchmarks) + +[Running a Subset of Benchmarks](#running-a-subset-of-benchmarks) + +[Result Comparison](#result-comparison) + +[Extra Context](#extra-context) + +## Library + +[Runtime and Reporting Considerations](#runtime-and-reporting-considerations) + +[Setup/Teardown](#setupteardown) + +[Passing Arguments](#passing-arguments) + +[Custom Benchmark Name](#custom-benchmark-name) + +[Calculating Asymptotic Complexity](#asymptotic-complexity) + +[Templated Benchmarks](#templated-benchmarks) + +[Fixtures](#fixtures) + +[Custom Counters](#custom-counters) + +[Multithreaded Benchmarks](#multithreaded-benchmarks) + +[CPU Timers](#cpu-timers) + +[Manual Timing](#manual-timing) + +[Setting the Time Unit](#setting-the-time-unit) + +[Random Interleaving](random_interleaving.md) + +[User-Requested Performance Counters](perf_counters.md) + +[Preventing Optimization](#preventing-optimization) + +[Reporting Statistics](#reporting-statistics) + +[Custom Statistics](#custom-statistics) + +[Using RegisterBenchmark](#using-register-benchmark) + +[Exiting with an Error](#exiting-with-an-error) + +[A Faster KeepRunning Loop](#a-faster-keep-running-loop) + +[Disabling CPU Frequency Scaling](#disabling-cpu-frequency-scaling) + + + + +## Output Formats + +The library supports multiple output formats. Use the +`--benchmark_format=` flag (or set the +`BENCHMARK_FORMAT=` environment variable) to set +the format type. `console` is the default format. + +The Console format is intended to be a human readable format. By default +the format generates color output. Context is output on stderr and the +tabular data on stdout. Example tabular output looks like: + +``` +Benchmark Time(ns) CPU(ns) Iterations +---------------------------------------------------------------------- +BM_SetInsert/1024/1 28928 29349 23853 133.097kB/s 33.2742k items/s +BM_SetInsert/1024/8 32065 32913 21375 949.487kB/s 237.372k items/s +BM_SetInsert/1024/10 33157 33648 21431 1.13369MB/s 290.225k items/s +``` + +The JSON format outputs human readable json split into two top level attributes. +The `context` attribute contains information about the run in general, including +information about the CPU and the date. +The `benchmarks` attribute contains a list of every benchmark run. Example json +output looks like: + +```json +{ + "context": { + "date": "2015/03/17-18:40:25", + "num_cpus": 40, + "mhz_per_cpu": 2801, + "cpu_scaling_enabled": false, + "build_type": "debug" + }, + "benchmarks": [ + { + "name": "BM_SetInsert/1024/1", + "iterations": 94877, + "real_time": 29275, + "cpu_time": 29836, + "bytes_per_second": 134066, + "items_per_second": 33516 + }, + { + "name": "BM_SetInsert/1024/8", + "iterations": 21609, + "real_time": 32317, + "cpu_time": 32429, + "bytes_per_second": 986770, + "items_per_second": 246693 + }, + { + "name": "BM_SetInsert/1024/10", + "iterations": 21393, + "real_time": 32724, + "cpu_time": 33355, + "bytes_per_second": 1199226, + "items_per_second": 299807 + } + ] +} +``` + +The CSV format outputs comma-separated values. The `context` is output on stderr +and the CSV itself on stdout. Example CSV output looks like: + +``` +name,iterations,real_time,cpu_time,bytes_per_second,items_per_second,label +"BM_SetInsert/1024/1",65465,17890.7,8407.45,475768,118942, +"BM_SetInsert/1024/8",116606,18810.1,9766.64,3.27646e+06,819115, +"BM_SetInsert/1024/10",106365,17238.4,8421.53,4.74973e+06,1.18743e+06, +``` + + + +## Output Files + +Write benchmark results to a file with the `--benchmark_out=` option +(or set `BENCHMARK_OUT`). Specify the output format with +`--benchmark_out_format={json|console|csv}` (or set +`BENCHMARK_OUT_FORMAT={json|console|csv}`). Note that the 'csv' reporter is +deprecated and the saved `.csv` file +[is not parsable](https://github.com/google/benchmark/issues/794) by csv +parsers. + +Specifying `--benchmark_out` does not suppress the console output. + + + +## Running Benchmarks + +Benchmarks are executed by running the produced binaries. Benchmarks binaries, +by default, accept options that may be specified either through their command +line interface or by setting environment variables before execution. For every +`--option_flag=` CLI switch, a corresponding environment variable +`OPTION_FLAG=` exist and is used as default if set (CLI switches always + prevails). A complete list of CLI options is available running benchmarks + with the `--help` switch. + + + +## Running a Subset of Benchmarks + +The `--benchmark_filter=` option (or `BENCHMARK_FILTER=` +environment variable) can be used to only run the benchmarks that match +the specified ``. For example: + +```bash +$ ./run_benchmarks.x --benchmark_filter=BM_memcpy/32 +Run on (1 X 2300 MHz CPU ) +2016-06-25 19:34:24 +Benchmark Time CPU Iterations +---------------------------------------------------- +BM_memcpy/32 11 ns 11 ns 79545455 +BM_memcpy/32k 2181 ns 2185 ns 324074 +BM_memcpy/32 12 ns 12 ns 54687500 +BM_memcpy/32k 1834 ns 1837 ns 357143 +``` + + + +## Result comparison + +It is possible to compare the benchmarking results. +See [Additional Tooling Documentation](tools.md) + + + +## Extra Context + +Sometimes it's useful to add extra context to the content printed before the +results. By default this section includes information about the CPU on which +the benchmarks are running. If you do want to add more context, you can use +the `benchmark_context` command line flag: + +```bash +$ ./run_benchmarks --benchmark_context=pwd=`pwd` +Run on (1 x 2300 MHz CPU) +pwd: /home/user/benchmark/ +Benchmark Time CPU Iterations +---------------------------------------------------- +BM_memcpy/32 11 ns 11 ns 79545455 +BM_memcpy/32k 2181 ns 2185 ns 324074 +``` + +You can get the same effect with the API: + +```c++ + benchmark::AddCustomContext("foo", "bar"); +``` + +Note that attempts to add a second value with the same key will fail with an +error message. + + + +## Runtime and Reporting Considerations + +When the benchmark binary is executed, each benchmark function is run serially. +The number of iterations to run is determined dynamically by running the +benchmark a few times and measuring the time taken and ensuring that the +ultimate result will be statistically stable. As such, faster benchmark +functions will be run for more iterations than slower benchmark functions, and +the number of iterations is thus reported. + +In all cases, the number of iterations for which the benchmark is run is +governed by the amount of time the benchmark takes. Concretely, the number of +iterations is at least one, not more than 1e9, until CPU time is greater than +the minimum time, or the wallclock time is 5x minimum time. The minimum time is +set per benchmark by calling `MinTime` on the registered benchmark object. + +Average timings are then reported over the iterations run. If multiple +repetitions are requested using the `--benchmark_repetitions` command-line +option, or at registration time, the benchmark function will be run several +times and statistical results across these repetitions will also be reported. + +As well as the per-benchmark entries, a preamble in the report will include +information about the machine on which the benchmarks are run. + + + +## Setup/Teardown + +Global setup/teardown specific to each benchmark can be done by +passing a callback to Setup/Teardown: + +The setup/teardown callbacks will be invoked once for each benchmark. +If the benchmark is multi-threaded (will run in k threads), they will be invoked exactly once before +each run with k threads. +If the benchmark uses different size groups of threads, the above will be true for each size group. + +Eg., + +```c++ +static void DoSetup(const benchmark::State& state) { +} + +static void DoTeardown(const benchmark::State& state) { +} + +static void BM_func(benchmark::State& state) {...} + +BENCHMARK(BM_func)->Arg(1)->Arg(3)->Threads(16)->Threads(32)->Setup(DoSetup)->Teardown(DoTeardown); + +``` + +In this example, `DoSetup` and `DoTearDown` will be invoked 4 times each, +specifically, once for each of this family: + - BM_func_Arg_1_Threads_16, BM_func_Arg_1_Threads_32 + - BM_func_Arg_3_Threads_16, BM_func_Arg_3_Threads_32 + + + +## Passing Arguments + +Sometimes a family of benchmarks can be implemented with just one routine that +takes an extra argument to specify which one of the family of benchmarks to +run. For example, the following code defines a family of benchmarks for +measuring the speed of `memcpy()` calls of different lengths: + +```c++ +static void BM_memcpy(benchmark::State& state) { + char* src = new char[state.range(0)]; + char* dst = new char[state.range(0)]; + memset(src, 'x', state.range(0)); + for (auto _ : state) + memcpy(dst, src, state.range(0)); + state.SetBytesProcessed(int64_t(state.iterations()) * + int64_t(state.range(0))); + delete[] src; + delete[] dst; +} +BENCHMARK(BM_memcpy)->Arg(8)->Arg(64)->Arg(512)->Arg(1<<10)->Arg(8<<10); +``` + +The preceding code is quite repetitive, and can be replaced with the following +short-hand. The following invocation will pick a few appropriate arguments in +the specified range and will generate a benchmark for each such argument. + +```c++ +BENCHMARK(BM_memcpy)->Range(8, 8<<10); +``` + +By default the arguments in the range are generated in multiples of eight and +the command above selects [ 8, 64, 512, 4k, 8k ]. In the following code the +range multiplier is changed to multiples of two. + +```c++ +BENCHMARK(BM_memcpy)->RangeMultiplier(2)->Range(8, 8<<10); +``` + +Now arguments generated are [ 8, 16, 32, 64, 128, 256, 512, 1024, 2k, 4k, 8k ]. + +The preceding code shows a method of defining a sparse range. The following +example shows a method of defining a dense range. It is then used to benchmark +the performance of `std::vector` initialization for uniformly increasing sizes. + +```c++ +static void BM_DenseRange(benchmark::State& state) { + for(auto _ : state) { + std::vector v(state.range(0), state.range(0)); + benchmark::DoNotOptimize(v.data()); + benchmark::ClobberMemory(); + } +} +BENCHMARK(BM_DenseRange)->DenseRange(0, 1024, 128); +``` + +Now arguments generated are [ 0, 128, 256, 384, 512, 640, 768, 896, 1024 ]. + +You might have a benchmark that depends on two or more inputs. For example, the +following code defines a family of benchmarks for measuring the speed of set +insertion. + +```c++ +static void BM_SetInsert(benchmark::State& state) { + std::set data; + for (auto _ : state) { + state.PauseTiming(); + data = ConstructRandomSet(state.range(0)); + state.ResumeTiming(); + for (int j = 0; j < state.range(1); ++j) + data.insert(RandomNumber()); + } +} +BENCHMARK(BM_SetInsert) + ->Args({1<<10, 128}) + ->Args({2<<10, 128}) + ->Args({4<<10, 128}) + ->Args({8<<10, 128}) + ->Args({1<<10, 512}) + ->Args({2<<10, 512}) + ->Args({4<<10, 512}) + ->Args({8<<10, 512}); +``` + +The preceding code is quite repetitive, and can be replaced with the following +short-hand. The following macro will pick a few appropriate arguments in the +product of the two specified ranges and will generate a benchmark for each such +pair. + +{% raw %} +```c++ +BENCHMARK(BM_SetInsert)->Ranges({{1<<10, 8<<10}, {128, 512}}); +``` +{% endraw %} + +Some benchmarks may require specific argument values that cannot be expressed +with `Ranges`. In this case, `ArgsProduct` offers the ability to generate a +benchmark input for each combination in the product of the supplied vectors. + +{% raw %} +```c++ +BENCHMARK(BM_SetInsert) + ->ArgsProduct({{1<<10, 3<<10, 8<<10}, {20, 40, 60, 80}}) +// would generate the same benchmark arguments as +BENCHMARK(BM_SetInsert) + ->Args({1<<10, 20}) + ->Args({3<<10, 20}) + ->Args({8<<10, 20}) + ->Args({3<<10, 40}) + ->Args({8<<10, 40}) + ->Args({1<<10, 40}) + ->Args({1<<10, 60}) + ->Args({3<<10, 60}) + ->Args({8<<10, 60}) + ->Args({1<<10, 80}) + ->Args({3<<10, 80}) + ->Args({8<<10, 80}); +``` +{% endraw %} + +For the most common scenarios, helper methods for creating a list of +integers for a given sparse or dense range are provided. + +```c++ +BENCHMARK(BM_SetInsert) + ->ArgsProduct({ + benchmark::CreateRange(8, 128, /*multi=*/2), + benchmark::CreateDenseRange(1, 4, /*step=*/1) + }) +// would generate the same benchmark arguments as +BENCHMARK(BM_SetInsert) + ->ArgsProduct({ + {8, 16, 32, 64, 128}, + {1, 2, 3, 4} + }); +``` + +For more complex patterns of inputs, passing a custom function to `Apply` allows +programmatic specification of an arbitrary set of arguments on which to run the +benchmark. The following example enumerates a dense range on one parameter, +and a sparse range on the second. + +```c++ +static void CustomArguments(benchmark::internal::Benchmark* b) { + for (int i = 0; i <= 10; ++i) + for (int j = 32; j <= 1024*1024; j *= 8) + b->Args({i, j}); +} +BENCHMARK(BM_SetInsert)->Apply(CustomArguments); +``` + +### Passing Arbitrary Arguments to a Benchmark + +In C++11 it is possible to define a benchmark that takes an arbitrary number +of extra arguments. The `BENCHMARK_CAPTURE(func, test_case_name, ...args)` +macro creates a benchmark that invokes `func` with the `benchmark::State` as +the first argument followed by the specified `args...`. +The `test_case_name` is appended to the name of the benchmark and +should describe the values passed. + +```c++ +template +void BM_takes_args(benchmark::State& state, ExtraArgs&&... extra_args) { + [...] +} +// Registers a benchmark named "BM_takes_args/int_string_test" that passes +// the specified values to `extra_args`. +BENCHMARK_CAPTURE(BM_takes_args, int_string_test, 42, std::string("abc")); +``` + +Note that elements of `...args` may refer to global variables. Users should +avoid modifying global state inside of a benchmark. + + + +## Calculating Asymptotic Complexity (Big O) + +Asymptotic complexity might be calculated for a family of benchmarks. The +following code will calculate the coefficient for the high-order term in the +running time and the normalized root-mean square error of string comparison. + +```c++ +static void BM_StringCompare(benchmark::State& state) { + std::string s1(state.range(0), '-'); + std::string s2(state.range(0), '-'); + for (auto _ : state) { + benchmark::DoNotOptimize(s1.compare(s2)); + } + state.SetComplexityN(state.range(0)); +} +BENCHMARK(BM_StringCompare) + ->RangeMultiplier(2)->Range(1<<10, 1<<18)->Complexity(benchmark::oN); +``` + +As shown in the following invocation, asymptotic complexity might also be +calculated automatically. + +```c++ +BENCHMARK(BM_StringCompare) + ->RangeMultiplier(2)->Range(1<<10, 1<<18)->Complexity(); +``` + +The following code will specify asymptotic complexity with a lambda function, +that might be used to customize high-order term calculation. + +```c++ +BENCHMARK(BM_StringCompare)->RangeMultiplier(2) + ->Range(1<<10, 1<<18)->Complexity([](benchmark::IterationCount n)->double{return n; }); +``` + + + +## Custom Benchmark Name + +You can change the benchmark's name as follows: + +```c++ +BENCHMARK(BM_memcpy)->Name("memcpy")->RangeMultiplier(2)->Range(8, 8<<10); +``` + +The invocation will execute the benchmark as before using `BM_memcpy` but changes +the prefix in the report to `memcpy`. + + + +## Templated Benchmarks + +This example produces and consumes messages of size `sizeof(v)` `range_x` +times. It also outputs throughput in the absence of multiprogramming. + +```c++ +template void BM_Sequential(benchmark::State& state) { + Q q; + typename Q::value_type v; + for (auto _ : state) { + for (int i = state.range(0); i--; ) + q.push(v); + for (int e = state.range(0); e--; ) + q.Wait(&v); + } + // actually messages, not bytes: + state.SetBytesProcessed( + static_cast(state.iterations())*state.range(0)); +} +// C++03 +BENCHMARK_TEMPLATE(BM_Sequential, WaitQueue)->Range(1<<0, 1<<10); + +// C++11 or newer, you can use the BENCHMARK macro with template parameters: +BENCHMARK(BM_Sequential>)->Range(1<<0, 1<<10); + +``` + +Three macros are provided for adding benchmark templates. + +```c++ +#ifdef BENCHMARK_HAS_CXX11 +#define BENCHMARK(func<...>) // Takes any number of parameters. +#else // C++ < C++11 +#define BENCHMARK_TEMPLATE(func, arg1) +#endif +#define BENCHMARK_TEMPLATE1(func, arg1) +#define BENCHMARK_TEMPLATE2(func, arg1, arg2) +``` + + + +## Fixtures + +Fixture tests are created by first defining a type that derives from +`::benchmark::Fixture` and then creating/registering the tests using the +following macros: + +* `BENCHMARK_F(ClassName, Method)` +* `BENCHMARK_DEFINE_F(ClassName, Method)` +* `BENCHMARK_REGISTER_F(ClassName, Method)` + +For Example: + +```c++ +class MyFixture : public benchmark::Fixture { +public: + void SetUp(const ::benchmark::State& state) { + } + + void TearDown(const ::benchmark::State& state) { + } +}; + +BENCHMARK_F(MyFixture, FooTest)(benchmark::State& st) { + for (auto _ : st) { + ... + } +} + +BENCHMARK_DEFINE_F(MyFixture, BarTest)(benchmark::State& st) { + for (auto _ : st) { + ... + } +} +/* BarTest is NOT registered */ +BENCHMARK_REGISTER_F(MyFixture, BarTest)->Threads(2); +/* BarTest is now registered */ +``` + +### Templated Fixtures + +Also you can create templated fixture by using the following macros: + +* `BENCHMARK_TEMPLATE_F(ClassName, Method, ...)` +* `BENCHMARK_TEMPLATE_DEFINE_F(ClassName, Method, ...)` + +For example: + +```c++ +template +class MyFixture : public benchmark::Fixture {}; + +BENCHMARK_TEMPLATE_F(MyFixture, IntTest, int)(benchmark::State& st) { + for (auto _ : st) { + ... + } +} + +BENCHMARK_TEMPLATE_DEFINE_F(MyFixture, DoubleTest, double)(benchmark::State& st) { + for (auto _ : st) { + ... + } +} + +BENCHMARK_REGISTER_F(MyFixture, DoubleTest)->Threads(2); +``` + + + +## Custom Counters + +You can add your own counters with user-defined names. The example below +will add columns "Foo", "Bar" and "Baz" in its output: + +```c++ +static void UserCountersExample1(benchmark::State& state) { + double numFoos = 0, numBars = 0, numBazs = 0; + for (auto _ : state) { + // ... count Foo,Bar,Baz events + } + state.counters["Foo"] = numFoos; + state.counters["Bar"] = numBars; + state.counters["Baz"] = numBazs; +} +``` + +The `state.counters` object is a `std::map` with `std::string` keys +and `Counter` values. The latter is a `double`-like class, via an implicit +conversion to `double&`. Thus you can use all of the standard arithmetic +assignment operators (`=,+=,-=,*=,/=`) to change the value of each counter. + +In multithreaded benchmarks, each counter is set on the calling thread only. +When the benchmark finishes, the counters from each thread will be summed; +the resulting sum is the value which will be shown for the benchmark. + +The `Counter` constructor accepts three parameters: the value as a `double` +; a bit flag which allows you to show counters as rates, and/or as per-thread +iteration, and/or as per-thread averages, and/or iteration invariants, +and/or finally inverting the result; and a flag specifying the 'unit' - i.e. +is 1k a 1000 (default, `benchmark::Counter::OneK::kIs1000`), or 1024 +(`benchmark::Counter::OneK::kIs1024`)? + +```c++ + // sets a simple counter + state.counters["Foo"] = numFoos; + + // Set the counter as a rate. It will be presented divided + // by the duration of the benchmark. + // Meaning: per one second, how many 'foo's are processed? + state.counters["FooRate"] = Counter(numFoos, benchmark::Counter::kIsRate); + + // Set the counter as a rate. It will be presented divided + // by the duration of the benchmark, and the result inverted. + // Meaning: how many seconds it takes to process one 'foo'? + state.counters["FooInvRate"] = Counter(numFoos, benchmark::Counter::kIsRate | benchmark::Counter::kInvert); + + // Set the counter as a thread-average quantity. It will + // be presented divided by the number of threads. + state.counters["FooAvg"] = Counter(numFoos, benchmark::Counter::kAvgThreads); + + // There's also a combined flag: + state.counters["FooAvgRate"] = Counter(numFoos,benchmark::Counter::kAvgThreadsRate); + + // This says that we process with the rate of state.range(0) bytes every iteration: + state.counters["BytesProcessed"] = Counter(state.range(0), benchmark::Counter::kIsIterationInvariantRate, benchmark::Counter::OneK::kIs1024); +``` + +When you're compiling in C++11 mode or later you can use `insert()` with +`std::initializer_list`: + +{% raw %} +```c++ + // With C++11, this can be done: + state.counters.insert({{"Foo", numFoos}, {"Bar", numBars}, {"Baz", numBazs}}); + // ... instead of: + state.counters["Foo"] = numFoos; + state.counters["Bar"] = numBars; + state.counters["Baz"] = numBazs; +``` +{% endraw %} + +### Counter Reporting + +When using the console reporter, by default, user counters are printed at +the end after the table, the same way as ``bytes_processed`` and +``items_processed``. This is best for cases in which there are few counters, +or where there are only a couple of lines per benchmark. Here's an example of +the default output: + +``` +------------------------------------------------------------------------------ +Benchmark Time CPU Iterations UserCounters... +------------------------------------------------------------------------------ +BM_UserCounter/threads:8 2248 ns 10277 ns 68808 Bar=16 Bat=40 Baz=24 Foo=8 +BM_UserCounter/threads:1 9797 ns 9788 ns 71523 Bar=2 Bat=5 Baz=3 Foo=1024m +BM_UserCounter/threads:2 4924 ns 9842 ns 71036 Bar=4 Bat=10 Baz=6 Foo=2 +BM_UserCounter/threads:4 2589 ns 10284 ns 68012 Bar=8 Bat=20 Baz=12 Foo=4 +BM_UserCounter/threads:8 2212 ns 10287 ns 68040 Bar=16 Bat=40 Baz=24 Foo=8 +BM_UserCounter/threads:16 1782 ns 10278 ns 68144 Bar=32 Bat=80 Baz=48 Foo=16 +BM_UserCounter/threads:32 1291 ns 10296 ns 68256 Bar=64 Bat=160 Baz=96 Foo=32 +BM_UserCounter/threads:4 2615 ns 10307 ns 68040 Bar=8 Bat=20 Baz=12 Foo=4 +BM_Factorial 26 ns 26 ns 26608979 40320 +BM_Factorial/real_time 26 ns 26 ns 26587936 40320 +BM_CalculatePiRange/1 16 ns 16 ns 45704255 0 +BM_CalculatePiRange/8 73 ns 73 ns 9520927 3.28374 +BM_CalculatePiRange/64 609 ns 609 ns 1140647 3.15746 +BM_CalculatePiRange/512 4900 ns 4901 ns 142696 3.14355 +``` + +If this doesn't suit you, you can print each counter as a table column by +passing the flag `--benchmark_counters_tabular=true` to the benchmark +application. This is best for cases in which there are a lot of counters, or +a lot of lines per individual benchmark. Note that this will trigger a +reprinting of the table header any time the counter set changes between +individual benchmarks. Here's an example of corresponding output when +`--benchmark_counters_tabular=true` is passed: + +``` +--------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations Bar Bat Baz Foo +--------------------------------------------------------------------------------------- +BM_UserCounter/threads:8 2198 ns 9953 ns 70688 16 40 24 8 +BM_UserCounter/threads:1 9504 ns 9504 ns 73787 2 5 3 1 +BM_UserCounter/threads:2 4775 ns 9550 ns 72606 4 10 6 2 +BM_UserCounter/threads:4 2508 ns 9951 ns 70332 8 20 12 4 +BM_UserCounter/threads:8 2055 ns 9933 ns 70344 16 40 24 8 +BM_UserCounter/threads:16 1610 ns 9946 ns 70720 32 80 48 16 +BM_UserCounter/threads:32 1192 ns 9948 ns 70496 64 160 96 32 +BM_UserCounter/threads:4 2506 ns 9949 ns 70332 8 20 12 4 +-------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------- +BM_Factorial 26 ns 26 ns 26392245 40320 +BM_Factorial/real_time 26 ns 26 ns 26494107 40320 +BM_CalculatePiRange/1 15 ns 15 ns 45571597 0 +BM_CalculatePiRange/8 74 ns 74 ns 9450212 3.28374 +BM_CalculatePiRange/64 595 ns 595 ns 1173901 3.15746 +BM_CalculatePiRange/512 4752 ns 4752 ns 147380 3.14355 +BM_CalculatePiRange/4k 37970 ns 37972 ns 18453 3.14184 +BM_CalculatePiRange/32k 303733 ns 303744 ns 2305 3.14162 +BM_CalculatePiRange/256k 2434095 ns 2434186 ns 288 3.1416 +BM_CalculatePiRange/1024k 9721140 ns 9721413 ns 71 3.14159 +BM_CalculatePi/threads:8 2255 ns 9943 ns 70936 +``` + +Note above the additional header printed when the benchmark changes from +``BM_UserCounter`` to ``BM_Factorial``. This is because ``BM_Factorial`` does +not have the same counter set as ``BM_UserCounter``. + + + +## Multithreaded Benchmarks + +In a multithreaded test (benchmark invoked by multiple threads simultaneously), +it is guaranteed that none of the threads will start until all have reached +the start of the benchmark loop, and all will have finished before any thread +exits the benchmark loop. (This behavior is also provided by the `KeepRunning()` +API) As such, any global setup or teardown can be wrapped in a check against the thread +index: + +```c++ +static void BM_MultiThreaded(benchmark::State& state) { + if (state.thread_index() == 0) { + // Setup code here. + } + for (auto _ : state) { + // Run the test as normal. + } + if (state.thread_index() == 0) { + // Teardown code here. + } +} +BENCHMARK(BM_MultiThreaded)->Threads(2); +``` + +If the benchmarked code itself uses threads and you want to compare it to +single-threaded code, you may want to use real-time ("wallclock") measurements +for latency comparisons: + +```c++ +BENCHMARK(BM_test)->Range(8, 8<<10)->UseRealTime(); +``` + +Without `UseRealTime`, CPU time is used by default. + + + +## CPU Timers + +By default, the CPU timer only measures the time spent by the main thread. +If the benchmark itself uses threads internally, this measurement may not +be what you are looking for. Instead, there is a way to measure the total +CPU usage of the process, by all the threads. + +```c++ +void callee(int i); + +static void MyMain(int size) { +#pragma omp parallel for + for(int i = 0; i < size; i++) + callee(i); +} + +static void BM_OpenMP(benchmark::State& state) { + for (auto _ : state) + MyMain(state.range(0)); +} + +// Measure the time spent by the main thread, use it to decide for how long to +// run the benchmark loop. Depending on the internal implementation detail may +// measure to anywhere from near-zero (the overhead spent before/after work +// handoff to worker thread[s]) to the whole single-thread time. +BENCHMARK(BM_OpenMP)->Range(8, 8<<10); + +// Measure the user-visible time, the wall clock (literally, the time that +// has passed on the clock on the wall), use it to decide for how long to +// run the benchmark loop. This will always be meaningful, an will match the +// time spent by the main thread in single-threaded case, in general decreasing +// with the number of internal threads doing the work. +BENCHMARK(BM_OpenMP)->Range(8, 8<<10)->UseRealTime(); + +// Measure the total CPU consumption, use it to decide for how long to +// run the benchmark loop. This will always measure to no less than the +// time spent by the main thread in single-threaded case. +BENCHMARK(BM_OpenMP)->Range(8, 8<<10)->MeasureProcessCPUTime(); + +// A mixture of the last two. Measure the total CPU consumption, but use the +// wall clock to decide for how long to run the benchmark loop. +BENCHMARK(BM_OpenMP)->Range(8, 8<<10)->MeasureProcessCPUTime()->UseRealTime(); +``` + +### Controlling Timers + +Normally, the entire duration of the work loop (`for (auto _ : state) {}`) +is measured. But sometimes, it is necessary to do some work inside of +that loop, every iteration, but without counting that time to the benchmark time. +That is possible, although it is not recommended, since it has high overhead. + +{% raw %} +```c++ +static void BM_SetInsert_With_Timer_Control(benchmark::State& state) { + std::set data; + for (auto _ : state) { + state.PauseTiming(); // Stop timers. They will not count until they are resumed. + data = ConstructRandomSet(state.range(0)); // Do something that should not be measured + state.ResumeTiming(); // And resume timers. They are now counting again. + // The rest will be measured. + for (int j = 0; j < state.range(1); ++j) + data.insert(RandomNumber()); + } +} +BENCHMARK(BM_SetInsert_With_Timer_Control)->Ranges({{1<<10, 8<<10}, {128, 512}}); +``` +{% endraw %} + + + +## Manual Timing + +For benchmarking something for which neither CPU time nor real-time are +correct or accurate enough, completely manual timing is supported using +the `UseManualTime` function. + +When `UseManualTime` is used, the benchmarked code must call +`SetIterationTime` once per iteration of the benchmark loop to +report the manually measured time. + +An example use case for this is benchmarking GPU execution (e.g. OpenCL +or CUDA kernels, OpenGL or Vulkan or Direct3D draw calls), which cannot +be accurately measured using CPU time or real-time. Instead, they can be +measured accurately using a dedicated API, and these measurement results +can be reported back with `SetIterationTime`. + +```c++ +static void BM_ManualTiming(benchmark::State& state) { + int microseconds = state.range(0); + std::chrono::duration sleep_duration { + static_cast(microseconds) + }; + + for (auto _ : state) { + auto start = std::chrono::high_resolution_clock::now(); + // Simulate some useful workload with a sleep + std::this_thread::sleep_for(sleep_duration); + auto end = std::chrono::high_resolution_clock::now(); + + auto elapsed_seconds = + std::chrono::duration_cast>( + end - start); + + state.SetIterationTime(elapsed_seconds.count()); + } +} +BENCHMARK(BM_ManualTiming)->Range(1, 1<<17)->UseManualTime(); +``` + + + +## Setting the Time Unit + +If a benchmark runs a few milliseconds it may be hard to visually compare the +measured times, since the output data is given in nanoseconds per default. In +order to manually set the time unit, you can specify it manually: + +```c++ +BENCHMARK(BM_test)->Unit(benchmark::kMillisecond); +``` + + + +## Preventing Optimization + +To prevent a value or expression from being optimized away by the compiler +the `benchmark::DoNotOptimize(...)` and `benchmark::ClobberMemory()` +functions can be used. + +```c++ +static void BM_test(benchmark::State& state) { + for (auto _ : state) { + int x = 0; + for (int i=0; i < 64; ++i) { + benchmark::DoNotOptimize(x += i); + } + } +} +``` + +`DoNotOptimize()` forces the *result* of `` to be stored in either +memory or a register. For GNU based compilers it acts as read/write barrier +for global memory. More specifically it forces the compiler to flush pending +writes to memory and reload any other values as necessary. + +Note that `DoNotOptimize()` does not prevent optimizations on `` +in any way. `` may even be removed entirely when the result is already +known. For example: + +```c++ + /* Example 1: `` is removed entirely. */ + int foo(int x) { return x + 42; } + while (...) DoNotOptimize(foo(0)); // Optimized to DoNotOptimize(42); + + /* Example 2: Result of '' is only reused */ + int bar(int) __attribute__((const)); + while (...) DoNotOptimize(bar(0)); // Optimized to: + // int __result__ = bar(0); + // while (...) DoNotOptimize(__result__); +``` + +The second tool for preventing optimizations is `ClobberMemory()`. In essence +`ClobberMemory()` forces the compiler to perform all pending writes to global +memory. Memory managed by block scope objects must be "escaped" using +`DoNotOptimize(...)` before it can be clobbered. In the below example +`ClobberMemory()` prevents the call to `v.push_back(42)` from being optimized +away. + +```c++ +static void BM_vector_push_back(benchmark::State& state) { + for (auto _ : state) { + std::vector v; + v.reserve(1); + benchmark::DoNotOptimize(v.data()); // Allow v.data() to be clobbered. + v.push_back(42); + benchmark::ClobberMemory(); // Force 42 to be written to memory. + } +} +``` + +Note that `ClobberMemory()` is only available for GNU or MSVC based compilers. + + + +## Statistics: Reporting the Mean, Median and Standard Deviation / Coefficient of variation of Repeated Benchmarks + +By default each benchmark is run once and that single result is reported. +However benchmarks are often noisy and a single result may not be representative +of the overall behavior. For this reason it's possible to repeatedly rerun the +benchmark. + +The number of runs of each benchmark is specified globally by the +`--benchmark_repetitions` flag or on a per benchmark basis by calling +`Repetitions` on the registered benchmark object. When a benchmark is run more +than once the mean, median, standard deviation and coefficient of variation +of the runs will be reported. + +Additionally the `--benchmark_report_aggregates_only={true|false}`, +`--benchmark_display_aggregates_only={true|false}` flags or +`ReportAggregatesOnly(bool)`, `DisplayAggregatesOnly(bool)` functions can be +used to change how repeated tests are reported. By default the result of each +repeated run is reported. When `report aggregates only` option is `true`, +only the aggregates (i.e. mean, median, standard deviation and coefficient +of variation, maybe complexity measurements if they were requested) of the runs +is reported, to both the reporters - standard output (console), and the file. +However when only the `display aggregates only` option is `true`, +only the aggregates are displayed in the standard output, while the file +output still contains everything. +Calling `ReportAggregatesOnly(bool)` / `DisplayAggregatesOnly(bool)` on a +registered benchmark object overrides the value of the appropriate flag for that +benchmark. + + + +## Custom Statistics + +While having these aggregates is nice, this may not be enough for everyone. +For example you may want to know what the largest observation is, e.g. because +you have some real-time constraints. This is easy. The following code will +specify a custom statistic to be calculated, defined by a lambda function. + +```c++ +void BM_spin_empty(benchmark::State& state) { + for (auto _ : state) { + for (int x = 0; x < state.range(0); ++x) { + benchmark::DoNotOptimize(x); + } + } +} + +BENCHMARK(BM_spin_empty) + ->ComputeStatistics("max", [](const std::vector& v) -> double { + return *(std::max_element(std::begin(v), std::end(v))); + }) + ->Arg(512); +``` + +While usually the statistics produce values in time units, +you can also produce percentages: + +```c++ +void BM_spin_empty(benchmark::State& state) { + for (auto _ : state) { + for (int x = 0; x < state.range(0); ++x) { + benchmark::DoNotOptimize(x); + } + } +} + +BENCHMARK(BM_spin_empty) + ->ComputeStatistics("ratio", [](const std::vector& v) -> double { + return std::begin(v) / std::end(v); + }, benchmark::StatisticUnit::Percentage) + ->Arg(512); +``` + + + +## Using RegisterBenchmark(name, fn, args...) + +The `RegisterBenchmark(name, func, args...)` function provides an alternative +way to create and register benchmarks. +`RegisterBenchmark(name, func, args...)` creates, registers, and returns a +pointer to a new benchmark with the specified `name` that invokes +`func(st, args...)` where `st` is a `benchmark::State` object. + +Unlike the `BENCHMARK` registration macros, which can only be used at the global +scope, the `RegisterBenchmark` can be called anywhere. This allows for +benchmark tests to be registered programmatically. + +Additionally `RegisterBenchmark` allows any callable object to be registered +as a benchmark. Including capturing lambdas and function objects. + +For Example: +```c++ +auto BM_test = [](benchmark::State& st, auto Inputs) { /* ... */ }; + +int main(int argc, char** argv) { + for (auto& test_input : { /* ... */ }) + benchmark::RegisterBenchmark(test_input.name(), BM_test, test_input); + benchmark::Initialize(&argc, argv); + benchmark::RunSpecifiedBenchmarks(); + benchmark::Shutdown(); +} +``` + + + +## Exiting with an Error + +When errors caused by external influences, such as file I/O and network +communication, occur within a benchmark the +`State::SkipWithError(const char* msg)` function can be used to skip that run +of benchmark and report the error. Note that only future iterations of the +`KeepRunning()` are skipped. For the ranged-for version of the benchmark loop +Users must explicitly exit the loop, otherwise all iterations will be performed. +Users may explicitly return to exit the benchmark immediately. + +The `SkipWithError(...)` function may be used at any point within the benchmark, +including before and after the benchmark loop. Moreover, if `SkipWithError(...)` +has been used, it is not required to reach the benchmark loop and one may return +from the benchmark function early. + +For example: + +```c++ +static void BM_test(benchmark::State& state) { + auto resource = GetResource(); + if (!resource.good()) { + state.SkipWithError("Resource is not good!"); + // KeepRunning() loop will not be entered. + } + while (state.KeepRunning()) { + auto data = resource.read_data(); + if (!resource.good()) { + state.SkipWithError("Failed to read data!"); + break; // Needed to skip the rest of the iteration. + } + do_stuff(data); + } +} + +static void BM_test_ranged_fo(benchmark::State & state) { + auto resource = GetResource(); + if (!resource.good()) { + state.SkipWithError("Resource is not good!"); + return; // Early return is allowed when SkipWithError() has been used. + } + for (auto _ : state) { + auto data = resource.read_data(); + if (!resource.good()) { + state.SkipWithError("Failed to read data!"); + break; // REQUIRED to prevent all further iterations. + } + do_stuff(data); + } +} +``` + + +## A Faster KeepRunning Loop + +In C++11 mode, a ranged-based for loop should be used in preference to +the `KeepRunning` loop for running the benchmarks. For example: + +```c++ +static void BM_Fast(benchmark::State &state) { + for (auto _ : state) { + FastOperation(); + } +} +BENCHMARK(BM_Fast); +``` + +The reason the ranged-for loop is faster than using `KeepRunning`, is +because `KeepRunning` requires a memory load and store of the iteration count +ever iteration, whereas the ranged-for variant is able to keep the iteration count +in a register. + +For example, an empty inner loop of using the ranged-based for method looks like: + +```asm +# Loop Init + mov rbx, qword ptr [r14 + 104] + call benchmark::State::StartKeepRunning() + test rbx, rbx + je .LoopEnd +.LoopHeader: # =>This Inner Loop Header: Depth=1 + add rbx, -1 + jne .LoopHeader +.LoopEnd: +``` + +Compared to an empty `KeepRunning` loop, which looks like: + +```asm +.LoopHeader: # in Loop: Header=BB0_3 Depth=1 + cmp byte ptr [rbx], 1 + jne .LoopInit +.LoopBody: # =>This Inner Loop Header: Depth=1 + mov rax, qword ptr [rbx + 8] + lea rcx, [rax + 1] + mov qword ptr [rbx + 8], rcx + cmp rax, qword ptr [rbx + 104] + jb .LoopHeader + jmp .LoopEnd +.LoopInit: + mov rdi, rbx + call benchmark::State::StartKeepRunning() + jmp .LoopBody +.LoopEnd: +``` + +Unless C++03 compatibility is required, the ranged-for variant of writing +the benchmark loop should be preferred. + + + +## Disabling CPU Frequency Scaling + +If you see this error: + +``` +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +``` + +you might want to disable the CPU frequency scaling while running the benchmark: + +```bash +sudo cpupower frequency-set --governor performance +./mybench +sudo cpupower frequency-set --governor powersave +``` diff --git a/third-party/benchmark/include/benchmark/benchmark.h b/third-party/benchmark/include/benchmark/benchmark.h index 9b5480244d6f..c8ced387714d 100644 --- a/third-party/benchmark/include/benchmark/benchmark.h +++ b/third-party/benchmark/include/benchmark/benchmark.h @@ -34,7 +34,7 @@ static void BM_StringCopy(benchmark::State& state) { BENCHMARK(BM_StringCopy); // Augment the main() program to invoke benchmarks if specified -// via the --benchmarks command line flag. E.g., +// via the --benchmark_filter command line flag. E.g., // my_unittest --benchmark_filter=all // my_unittest --benchmark_filter=BM_StringCreation // my_unittest --benchmark_filter=String @@ -140,13 +140,13 @@ thread exits the loop body. As such, any global setup or teardown you want to do can be wrapped in a check against the thread index: static void BM_MultiThreaded(benchmark::State& state) { - if (state.thread_index == 0) { + if (state.thread_index() == 0) { // Setup code here. } for (auto _ : state) { // Run the test as normal. } - if (state.thread_index == 0) { + if (state.thread_index() == 0) { // Teardown code here. } } @@ -180,6 +180,7 @@ BENCHMARK(BM_test)->Unit(benchmark::kMillisecond); #include #include #include +#include #include #include #include @@ -187,6 +188,7 @@ BENCHMARK(BM_test)->Unit(benchmark::kMillisecond); #include #if defined(BENCHMARK_HAS_CXX11) +#include #include #include #include @@ -237,16 +239,24 @@ BENCHMARK(BM_test)->Unit(benchmark::kMillisecond); #define BENCHMARK_INTERNAL_TOSTRING2(x) #x #define BENCHMARK_INTERNAL_TOSTRING(x) BENCHMARK_INTERNAL_TOSTRING2(x) +// clang-format off #if defined(__GNUC__) || defined(__clang__) #define BENCHMARK_BUILTIN_EXPECT(x, y) __builtin_expect(x, y) #define BENCHMARK_DEPRECATED_MSG(msg) __attribute__((deprecated(msg))) +#define BENCHMARK_DISABLE_DEPRECATED_WARNING \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") +#define BENCHMARK_RESTORE_DEPRECATED_WARNING _Pragma("GCC diagnostic pop") #else #define BENCHMARK_BUILTIN_EXPECT(x, y) x #define BENCHMARK_DEPRECATED_MSG(msg) #define BENCHMARK_WARNING_MSG(msg) \ __pragma(message(__FILE__ "(" BENCHMARK_INTERNAL_TOSTRING( \ __LINE__) ") : warning note: " msg)) +#define BENCHMARK_DISABLE_DEPRECATED_WARNING +#define BENCHMARK_RESTORE_DEPRECATED_WARNING #endif +// clang-format on #if defined(__GNUC__) && !defined(__clang__) #define BENCHMARK_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) @@ -272,7 +282,6 @@ BENCHMARK(BM_test)->Unit(benchmark::kMillisecond); namespace benchmark { class BenchmarkReporter; -class MemoryManager; void Initialize(int* argc, char** argv); void Shutdown(); @@ -281,11 +290,18 @@ void Shutdown(); // Returns true there is at least on unrecognized argument (i.e. 'argc' > 1). bool ReportUnrecognizedArguments(int argc, char** argv); +// Returns the current value of --benchmark_filter. +std::string GetBenchmarkFilter(); + // Generate a list of benchmarks matching the specified --benchmark_filter flag // and if --benchmark_list_tests is specified return after printing the name // of each matching benchmark. Otherwise run each matching benchmark and // report the results. // +// spec : Specify the benchmarks to run. If users do not specify this arg, +// then the value of FLAGS_benchmark_filter +// will be used. +// // The second and third overload use the specified 'display_reporter' and // 'file_reporter' respectively. 'file_reporter' will write to the file // specified @@ -294,9 +310,62 @@ bool ReportUnrecognizedArguments(int argc, char** argv); // // RETURNS: The number of matching benchmarks. size_t RunSpecifiedBenchmarks(); +size_t RunSpecifiedBenchmarks(std::string spec); + size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter); +size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter, + std::string spec); + size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter, BenchmarkReporter* file_reporter); +size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter, + BenchmarkReporter* file_reporter, + std::string spec); + +// If a MemoryManager is registered (via RegisterMemoryManager()), +// it can be used to collect and report allocation metrics for a run of the +// benchmark. +class MemoryManager { + public: + static const int64_t TombstoneValue; + + struct Result { + Result() + : num_allocs(0), + max_bytes_used(0), + total_allocated_bytes(TombstoneValue), + net_heap_growth(TombstoneValue) {} + + // The number of allocations made in total between Start and Stop. + int64_t num_allocs; + + // The peak memory use between Start and Stop. + int64_t max_bytes_used; + + // The total memory allocated, in bytes, between Start and Stop. + // Init'ed to TombstoneValue if metric not available. + int64_t total_allocated_bytes; + + // The net changes in memory, in bytes, between Start and Stop. + // ie., total_allocated_bytes - total_deallocated_bytes. + // Init'ed to TombstoneValue if metric not available. + int64_t net_heap_growth; + }; + + virtual ~MemoryManager() {} + + // Implement this to start recording allocation information. + virtual void Start() = 0; + + // Implement this to stop recording and fill out the given Result structure. + BENCHMARK_DEPRECATED_MSG("Use Stop(Result&) instead") + virtual void Stop(Result* result) = 0; + + // FIXME(vyng): Make this pure virtual once we've migrated current users. + BENCHMARK_DISABLE_DEPRECATED_WARNING + virtual void Stop(Result& result) { Stop(&result); } + BENCHMARK_RESTORE_DEPRECATED_WARNING +}; // Register a MemoryManager instance that will be used to collect and report // allocation measurements for benchmark runs. @@ -327,6 +396,14 @@ BENCHMARK_UNUSED static int stream_init_anchor = InitializeStreams(); #define BENCHMARK_HAS_NO_INLINE_ASSEMBLY #endif +// Force the compiler to flush pending writes to global memory. Acts as an +// effective read/write barrier +#ifdef BENCHMARK_HAS_CXX11 +inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() { + std::atomic_signal_fence(std::memory_order_acq_rel); +} +#endif + // The DoNotOptimize(...) function can be used to prevent a value or // expression from being optimized away by the compiler. This function is // intended to add little to no overhead. @@ -346,11 +423,11 @@ inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp& value) { #endif } -// Force the compiler to flush pending writes to global memory. Acts as an -// effective read/write barrier +#ifndef BENCHMARK_HAS_CXX11 inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() { asm volatile("" : : : "memory"); } +#endif #elif defined(_MSC_VER) template inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) { @@ -358,13 +435,15 @@ inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) { _ReadWriteBarrier(); } +#ifndef BENCHMARK_HAS_CXX11 inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() { _ReadWriteBarrier(); } +#endif #else template inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) { internal::UseCharPointer(&reinterpret_cast(value)); } -// FIXME Add ClobberMemory() for non-gnu and non-msvc compilers +// FIXME Add ClobberMemory() for non-gnu and non-msvc compilers, before C++11. #endif // This class is used for user-defined counters. @@ -374,27 +453,27 @@ class Counter { kDefaults = 0, // Mark the counter as a rate. It will be presented divided // by the duration of the benchmark. - kIsRate = 1U << 0U, + kIsRate = 1 << 0, // Mark the counter as a thread-average quantity. It will be // presented divided by the number of threads. - kAvgThreads = 1U << 1U, + kAvgThreads = 1 << 1, // Mark the counter as a thread-average rate. See above. kAvgThreadsRate = kIsRate | kAvgThreads, // Mark the counter as a constant value, valid/same for *every* iteration. // When reporting, it will be *multiplied* by the iteration count. - kIsIterationInvariant = 1U << 2U, + kIsIterationInvariant = 1 << 2, // Mark the counter as a constant rate. // When reporting, it will be *multiplied* by the iteration count // and then divided by the duration of the benchmark. kIsIterationInvariantRate = kIsRate | kIsIterationInvariant, // Mark the counter as a iteration-average quantity. // It will be presented divided by the number of iterations. - kAvgIterations = 1U << 3U, + kAvgIterations = 1 << 3, // Mark the counter as a iteration-average rate. See above. kAvgIterationsRate = kIsRate | kAvgIterations, // In the end, invert the result. This is always done last! - kInvert = 1U << 31U + kInvert = 1 << 31 }; enum OneK { @@ -412,7 +491,7 @@ class Counter { Counter(double v = 0., Flags f = kDefaults, OneK k = kIs1000) : value(v), flags(f), oneK(k) {} - BENCHMARK_ALWAYS_INLINE operator double const&() const { return value; } + BENCHMARK_ALWAYS_INLINE operator double const &() const { return value; } BENCHMARK_ALWAYS_INLINE operator double&() { return value; } }; @@ -439,6 +518,8 @@ enum BigO { oNone, o1, oN, oNSquared, oNCubed, oLogN, oNLogN, oAuto, oLambda }; typedef uint64_t IterationCount; +enum StatisticUnit { kTime, kPercentage }; + // BigOFunc is passed to a benchmark in order to specify the asymptotic // computational complexity for the benchmark. typedef double(BigOFunc)(IterationCount); @@ -451,9 +532,11 @@ namespace internal { struct Statistics { std::string name_; StatisticsFunc* compute_; + StatisticUnit unit_; - Statistics(const std::string& name, StatisticsFunc* compute) - : name_(name), compute_(compute) {} + Statistics(const std::string& name, StatisticsFunc* compute, + StatisticUnit unit = kTime) + : name_(name), compute_(compute), unit_(unit) {} }; class BenchmarkInstance; @@ -656,6 +739,14 @@ class State { BENCHMARK_DEPRECATED_MSG("use 'range(1)' instead") int64_t range_y() const { return range(1); } + // Number of threads concurrently executing the benchmark. + BENCHMARK_ALWAYS_INLINE + int threads() const { return threads_; } + + // Index of the executing thread. Values from [0, threads). + BENCHMARK_ALWAYS_INLINE + int thread_index() const { return thread_index_; } + BENCHMARK_ALWAYS_INLINE IterationCount iterations() const { if (BENCHMARK_BUILTIN_EXPECT(!started_, false)) { @@ -664,8 +755,8 @@ class State { return max_iterations - total_iterations_ + batch_leftover_; } - private - : // items we expect on the first cache line (ie 64 bytes of the struct) + private: + // items we expect on the first cache line (ie 64 bytes of the struct) // When total_iterations_ is 0, KeepRunning() and friends will return false. // May be larger than max_iterations. IterationCount total_iterations_; @@ -683,7 +774,7 @@ class State { bool finished_; bool error_occurred_; - private: // items we don't need on the first cache line + // items we don't need on the first cache line std::vector range_; int64_t complexity_n_; @@ -691,10 +782,6 @@ class State { public: // Container for user-defined counters. UserCounters counters; - // Index of the executing thread. Values from [0, threads). - const int thread_index; - // Number of threads concurrently executing the benchmark. - const int threads; private: State(IterationCount max_iters, const std::vector& ranges, @@ -707,6 +794,10 @@ class State { // is_batch must be true unless n is 1. bool KeepRunningInternal(IterationCount n, bool is_batch); void FinishKeepRunning(); + + const int thread_index_; + const int threads_; + internal::ThreadTimer* const timer_; internal::ThreadManager* const manager_; internal::PerfCountersMeasurement* const perf_counters_measurement_; @@ -878,6 +969,23 @@ class Benchmark { return Ranges(ranges); } + // Have "setup" and/or "teardown" invoked once for every benchmark run. + // If the benchmark is multi-threaded (will run in k threads concurrently), + // the setup callback will be be invoked exactly once (not k times) before + // each run with k threads. Time allowing (e.g. for a short benchmark), there + // may be multiple such runs per benchmark, each run with its own + // "setup"/"teardown". + // + // If the benchmark uses different size groups of threads (e.g. via + // ThreadRange), the above will be true for each size group. + // + // The callback will be passed a State object, which includes the number + // of threads, thread-index, benchmark arguments, etc. + // + // The callback must not be NULL or self-deleting. + Benchmark* Setup(void (*setup)(const benchmark::State&)); + Benchmark* Teardown(void (*teardown)(const benchmark::State&)); + // Pass this benchmark object to *func, which can customize // the benchmark by calling various methods like Arg, Args, // Threads, etc. @@ -946,7 +1054,9 @@ class Benchmark { Benchmark* Complexity(BigOFunc* complexity); // Add this statistics to be computed over all the values of benchmark run - Benchmark* ComputeStatistics(std::string name, StatisticsFunc* statistics); + Benchmark* ComputeStatistics(const std::string& name, + StatisticsFunc* statistics, + StatisticUnit unit = kTime); // Support for running multiple copies of the same benchmark concurrently // in multiple threads. This may be useful when measuring the scaling @@ -1008,6 +1118,10 @@ class Benchmark { std::vector statistics_; std::vector thread_counts_; + typedef void (*callback_function)(const benchmark::State&); + callback_function setup_; + callback_function teardown_; + Benchmark& operator=(Benchmark const&); }; @@ -1056,8 +1170,7 @@ class LambdaBenchmark : public Benchmark { LambdaBenchmark(LambdaBenchmark const&) = delete; - private: - template + template // NOLINTNEXTLINE(readability-redundant-declaration) friend Benchmark* ::benchmark::RegisterBenchmark(const char*, Lam&&); Lambda lambda_; @@ -1131,22 +1244,37 @@ class Fixture : public internal::Benchmark { #endif // Helpers for generating unique variable names +#ifdef BENCHMARK_HAS_CXX11 +#define BENCHMARK_PRIVATE_NAME(...) \ + BENCHMARK_PRIVATE_CONCAT(benchmark_uniq_, BENCHMARK_PRIVATE_UNIQUE_ID, \ + __VA_ARGS__) +#else #define BENCHMARK_PRIVATE_NAME(n) \ BENCHMARK_PRIVATE_CONCAT(benchmark_uniq_, BENCHMARK_PRIVATE_UNIQUE_ID, n) +#endif // BENCHMARK_HAS_CXX11 + #define BENCHMARK_PRIVATE_CONCAT(a, b, c) BENCHMARK_PRIVATE_CONCAT2(a, b, c) #define BENCHMARK_PRIVATE_CONCAT2(a, b, c) a##b##c // Helper for concatenation with macro name expansion #define BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method) \ - BaseClass##_##Method##_Benchmark + BaseClass##_##Method##_Benchmark #define BENCHMARK_PRIVATE_DECLARE(n) \ static ::benchmark::internal::Benchmark* BENCHMARK_PRIVATE_NAME(n) \ BENCHMARK_UNUSED +#ifdef BENCHMARK_HAS_CXX11 +#define BENCHMARK(...) \ + BENCHMARK_PRIVATE_DECLARE(_benchmark_) = \ + (::benchmark::internal::RegisterBenchmarkInternal( \ + new ::benchmark::internal::FunctionBenchmark(#__VA_ARGS__, \ + &__VA_ARGS__))) +#else #define BENCHMARK(n) \ BENCHMARK_PRIVATE_DECLARE(n) = \ (::benchmark::internal::RegisterBenchmarkInternal( \ new ::benchmark::internal::FunctionBenchmark(#n, n))) +#endif // BENCHMARK_HAS_CXX11 // Old-style macros #define BENCHMARK_WITH_ARG(n, a) BENCHMARK(n)->Arg((a)) @@ -1210,7 +1338,7 @@ class Fixture : public internal::Benchmark { #define BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \ class BaseClass##_##Method##_Benchmark : public BaseClass { \ public: \ - BaseClass##_##Method##_Benchmark() : BaseClass() { \ + BaseClass##_##Method##_Benchmark() { \ this->SetName(#BaseClass "/" #Method); \ } \ \ @@ -1221,7 +1349,7 @@ class Fixture : public internal::Benchmark { #define BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a) \ class BaseClass##_##Method##_Benchmark : public BaseClass { \ public: \ - BaseClass##_##Method##_Benchmark() : BaseClass() { \ + BaseClass##_##Method##_Benchmark() { \ this->SetName(#BaseClass "<" #a ">/" #Method); \ } \ \ @@ -1232,7 +1360,7 @@ class Fixture : public internal::Benchmark { #define BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b) \ class BaseClass##_##Method##_Benchmark : public BaseClass { \ public: \ - BaseClass##_##Method##_Benchmark() : BaseClass() { \ + BaseClass##_##Method##_Benchmark() { \ this->SetName(#BaseClass "<" #a "," #b ">/" #Method); \ } \ \ @@ -1244,7 +1372,7 @@ class Fixture : public internal::Benchmark { #define BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(BaseClass, Method, ...) \ class BaseClass##_##Method##_Benchmark : public BaseClass<__VA_ARGS__> { \ public: \ - BaseClass##_##Method##_Benchmark() : BaseClass<__VA_ARGS__>() { \ + BaseClass##_##Method##_Benchmark() { \ this->SetName(#BaseClass "<" #__VA_ARGS__ ">/" #Method); \ } \ \ @@ -1334,11 +1462,7 @@ struct CPUInfo { int num_sharing; }; - enum Scaling { - UNKNOWN, - ENABLED, - DISABLED - }; + enum Scaling { UNKNOWN, ENABLED, DISABLED }; int num_cpus; Scaling scaling; @@ -1402,6 +1526,7 @@ class BenchmarkReporter { Run() : run_type(RT_Iteration), + aggregate_unit(kTime), error_occurred(false), iterations(1), threads(1), @@ -1414,10 +1539,8 @@ class BenchmarkReporter { complexity_n(0), report_big_o(false), report_rms(false), - counters(), - has_memory_result(false), - allocs_per_iter(0.0), - max_bytes_used(0) {} + memory_result(NULL), + allocs_per_iter(0.0) {} std::string benchmark_name() const; BenchmarkName run_name; @@ -1425,6 +1548,7 @@ class BenchmarkReporter { int64_t per_family_instance_index; RunType run_type; std::string aggregate_name; + StatisticUnit aggregate_unit; std::string report_label; // Empty if not set by benchmark. bool error_occurred; std::string error_message; @@ -1467,9 +1591,8 @@ class BenchmarkReporter { UserCounters counters; // Memory metrics. - bool has_memory_result; + const MemoryManager::Result* memory_result; double allocs_per_iter; - int64_t max_bytes_used; }; struct PerFamilyRunReports { @@ -1552,10 +1675,7 @@ class ConsoleReporter : public BenchmarkReporter { OO_Defaults = OO_ColorTabular }; explicit ConsoleReporter(OutputOptions opts_ = OO_Defaults) - : output_options_(opts_), - name_field_width_(0), - prev_counters_(), - printed_header_(false) {} + : output_options_(opts_), name_field_width_(0), printed_header_(false) {} virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE; virtual void ReportRuns(const std::vector& reports) BENCHMARK_OVERRIDE; @@ -1598,29 +1718,6 @@ class BENCHMARK_DEPRECATED_MSG( std::set user_counter_names_; }; -// If a MemoryManager is registered, it can be used to collect and report -// allocation metrics for a run of the benchmark. -class MemoryManager { - public: - struct Result { - Result() : num_allocs(0), max_bytes_used(0) {} - - // The number of allocations made in total between Start and Stop. - int64_t num_allocs; - - // The peak memory use between Start and Stop. - int64_t max_bytes_used; - }; - - virtual ~MemoryManager() {} - - // Implement this to start recording allocation information. - virtual void Start() = 0; - - // Implement this to stop recording and fill out the given Result structure. - virtual void Stop(Result* result) = 0; -}; - inline const char* GetTimeUnitString(TimeUnit unit) { switch (unit) { case kSecond: @@ -1649,6 +1746,20 @@ inline double GetTimeUnitMultiplier(TimeUnit unit) { BENCHMARK_UNREACHABLE(); } +// Creates a list of integer values for the given range and multiplier. +// This can be used together with ArgsProduct() to allow multiple ranges +// with different multiplers. +// Example: +// ArgsProduct({ +// CreateRange(0, 1024, /*multi=*/32), +// CreateRange(0, 100, /*multi=*/4), +// CreateDenseRange(0, 4, /*step=*/1), +// }); +std::vector CreateRange(int64_t lo, int64_t hi, int multi); + +// Creates a list of integer values for the given range and step. +std::vector CreateDenseRange(int64_t start, int64_t limit, int step); + } // namespace benchmark #endif // BENCHMARK_BENCHMARK_H_ diff --git a/third-party/benchmark/requirements.txt b/third-party/benchmark/requirements.txt index 85e898604068..e451894e2356 100644 --- a/third-party/benchmark/requirements.txt +++ b/third-party/benchmark/requirements.txt @@ -1,2 +1,3 @@ numpy == 1.19.4 scipy == 1.5.4 +pandas == 1.1.5 diff --git a/third-party/benchmark/setup.py b/third-party/benchmark/setup.py index 5cdab10cf77c..4eaccf849801 100644 --- a/third-party/benchmark/setup.py +++ b/third-party/benchmark/setup.py @@ -1,5 +1,6 @@ import os import posixpath +import platform import re import shutil import sys @@ -89,6 +90,8 @@ class BuildBazelExtension(build_ext.build_ext): # Link with python*.lib. for library_dir in self.library_dirs: bazel_argv.append("--linkopt=/LIBPATH:" + library_dir) + elif sys.platform == "darwin" and platform.machine() == "x86_64": + bazel_argv.append("--macos_minimum_os=10.9") self.spawn(bazel_argv) diff --git a/third-party/benchmark/src/CMakeLists.txt b/third-party/benchmark/src/CMakeLists.txt index a6c8e9a7a0b7..dd82e9761bd4 100644 --- a/third-party/benchmark/src/CMakeLists.txt +++ b/third-party/benchmark/src/CMakeLists.txt @@ -25,38 +25,32 @@ set_target_properties(benchmark PROPERTIES SOVERSION ${GENERIC_LIB_SOVERSION} ) target_include_directories(benchmark PUBLIC - $ - ) + $) # libpfm, if available if (HAVE_LIBPFM) - target_link_libraries(benchmark libpfm.a) + target_link_libraries(benchmark PRIVATE pfm) add_definitions(-DHAVE_LIBPFM) endif() # Link threads. -target_link_libraries(benchmark ${BENCHMARK_CXX_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT}) -find_library(LIBRT rt) -if(LIBRT) - target_link_libraries(benchmark ${LIBRT}) -endif() +target_link_libraries(benchmark PRIVATE Threads::Threads) + +target_link_libraries(benchmark PRIVATE ${BENCHMARK_CXX_LIBRARIES}) + +if(HAVE_LIB_RT) + target_link_libraries(benchmark PRIVATE rt) +endif(HAVE_LIB_RT) -if(CMAKE_BUILD_TYPE) - string(TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_UPPER) -endif() -if(NOT CMAKE_THREAD_LIBS_INIT AND "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}}" MATCHES ".*-fsanitize=[^ ]*address.*") - message(WARNING "CMake's FindThreads.cmake did not fail, but CMAKE_THREAD_LIBS_INIT ended up being empty. This was fixed in https://github.com/Kitware/CMake/commit/d53317130e84898c5328c237186dbd995aaf1c12 Let's guess that -pthread is sufficient.") - target_link_libraries(benchmark -pthread) -endif() # We need extra libraries on Windows if(${CMAKE_SYSTEM_NAME} MATCHES "Windows") - target_link_libraries(benchmark shlwapi) + target_link_libraries(benchmark PRIVATE shlwapi) endif() # We need extra libraries on Solaris if(${CMAKE_SYSTEM_NAME} MATCHES "SunOS") - target_link_libraries(benchmark kstat) + target_link_libraries(benchmark PRIVATE kstat) endif() # Benchmark main library @@ -67,33 +61,44 @@ set_target_properties(benchmark_main PROPERTIES VERSION ${GENERIC_LIB_VERSION} SOVERSION ${GENERIC_LIB_SOVERSION} ) -target_include_directories(benchmark PUBLIC - $ - ) -target_link_libraries(benchmark_main benchmark::benchmark) +target_link_libraries(benchmark_main PUBLIC benchmark::benchmark) -set(generated_dir "${CMAKE_CURRENT_BINARY_DIR}/generated") +set(generated_dir "${PROJECT_BINARY_DIR}") set(version_config "${generated_dir}/${PROJECT_NAME}ConfigVersion.cmake") set(project_config "${generated_dir}/${PROJECT_NAME}Config.cmake") set(pkg_config "${generated_dir}/${PROJECT_NAME}.pc") +set(targets_to_export benchmark benchmark_main) set(targets_export_name "${PROJECT_NAME}Targets") set(namespace "${PROJECT_NAME}::") include(CMakePackageConfigHelpers) + +configure_package_config_file ( + ${PROJECT_SOURCE_DIR}/cmake/Config.cmake.in + ${project_config} + INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME} + NO_SET_AND_CHECK_MACRO + NO_CHECK_REQUIRED_COMPONENTS_MACRO +) write_basic_package_version_file( "${version_config}" VERSION ${GENERIC_LIB_VERSION} COMPATIBILITY SameMajorVersion ) -configure_file("${PROJECT_SOURCE_DIR}/cmake/Config.cmake.in" "${project_config}" @ONLY) configure_file("${PROJECT_SOURCE_DIR}/cmake/benchmark.pc.in" "${pkg_config}" @ONLY) +export ( + TARGETS ${targets_to_export} + NAMESPACE "${namespace}" + FILE ${generated_dir}/${targets_export_name}.cmake +) + if (BENCHMARK_ENABLE_INSTALL) # Install target (will install the library to specified CMAKE_INSTALL_PREFIX variable) install( - TARGETS benchmark benchmark_main + TARGETS ${targets_to_export} EXPORT ${targets_export_name} ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} @@ -118,3 +123,37 @@ if (BENCHMARK_ENABLE_INSTALL) NAMESPACE "${namespace}" DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}") endif() + +if (BENCHMARK_ENABLE_DOXYGEN) + find_package(Doxygen REQUIRED) + set(DOXYGEN_QUIET YES) + set(DOXYGEN_RECURSIVE YES) + set(DOXYGEN_GENERATE_HTML YES) + set(DOXYGEN_GENERATE_MAN NO) + set(DOXYGEN_MARKDOWN_SUPPORT YES) + set(DOXYGEN_BUILTIN_STL_SUPPORT YES) + set(DOXYGEN_EXTRACT_PACKAGE YES) + set(DOXYGEN_EXTRACT_STATIC YES) + set(DOXYGEN_SHOW_INCLUDE_FILES YES) + set(DOXYGEN_BINARY_TOC YES) + set(DOXYGEN_TOC_EXPAND YES) + set(DOXYGEN_USE_MDFILE_AS_MAINPAGE "index.md") + doxygen_add_docs(benchmark_doxygen + docs + include + src + ALL + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} + COMMENT "Building documentation with Doxygen.") + if (BENCHMARK_ENABLE_INSTALL AND BENCHMARK_INSTALL_DOCS) + install( + DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/html/" + DESTINATION ${CMAKE_INSTALL_DOCDIR}) + endif() +else() + if (BENCHMARK_ENABLE_INSTALL AND BENCHMARK_INSTALL_DOCS) + install( + DIRECTORY "${PROJECT_SOURCE_DIR}/docs/" + DESTINATION ${CMAKE_INSTALL_DOCDIR}) + endif() +endif() \ No newline at end of file diff --git a/third-party/benchmark/src/benchmark.cc b/third-party/benchmark/src/benchmark.cc index 89f64967bf18..cedeee31c79e 100644 --- a/third-party/benchmark/src/benchmark.cc +++ b/third-party/benchmark/src/benchmark.cc @@ -56,75 +56,75 @@ #include "thread_manager.h" #include "thread_timer.h" +namespace benchmark { // Print a list of benchmarks. This option overrides all other options. -DEFINE_bool(benchmark_list_tests, false); +BM_DEFINE_bool(benchmark_list_tests, false); // A regular expression that specifies the set of benchmarks to execute. If // this flag is empty, or if this flag is the string \"all\", all benchmarks // linked into the binary are run. -DEFINE_string(benchmark_filter, "."); +BM_DEFINE_string(benchmark_filter, ""); // Minimum number of seconds we should run benchmark before results are // considered significant. For cpu-time based tests, this is the lower bound // on the total cpu time used by all threads that make up the test. For // real-time based tests, this is the lower bound on the elapsed time of the // benchmark execution, regardless of number of threads. -DEFINE_double(benchmark_min_time, 0.5); +BM_DEFINE_double(benchmark_min_time, 0.5); // The number of runs of each benchmark. If greater than 1, the mean and // standard deviation of the runs will be reported. -DEFINE_int32(benchmark_repetitions, 1); +BM_DEFINE_int32(benchmark_repetitions, 1); // If set, enable random interleaving of repetitions of all benchmarks. // See http://github.com/google/benchmark/issues/1051 for details. -DEFINE_bool(benchmark_enable_random_interleaving, false); +BM_DEFINE_bool(benchmark_enable_random_interleaving, false); // Report the result of each benchmark repetitions. When 'true' is specified // only the mean, standard deviation, and other statistics are reported for // repeated benchmarks. Affects all reporters. -DEFINE_bool(benchmark_report_aggregates_only, false); +BM_DEFINE_bool(benchmark_report_aggregates_only, false); // Display the result of each benchmark repetitions. When 'true' is specified // only the mean, standard deviation, and other statistics are displayed for // repeated benchmarks. Unlike benchmark_report_aggregates_only, only affects // the display reporter, but *NOT* file reporter, which will still contain // all the output. -DEFINE_bool(benchmark_display_aggregates_only, false); +BM_DEFINE_bool(benchmark_display_aggregates_only, false); // The format to use for console output. // Valid values are 'console', 'json', or 'csv'. -DEFINE_string(benchmark_format, "console"); +BM_DEFINE_string(benchmark_format, "console"); // The format to use for file output. // Valid values are 'console', 'json', or 'csv'. -DEFINE_string(benchmark_out_format, "json"); +BM_DEFINE_string(benchmark_out_format, "json"); // The file to write additional output to. -DEFINE_string(benchmark_out, ""); +BM_DEFINE_string(benchmark_out, ""); // Whether to use colors in the output. Valid values: // 'true'/'yes'/1, 'false'/'no'/0, and 'auto'. 'auto' means to use colors if // the output is being sent to a terminal and the TERM environment variable is // set to a terminal type that supports colors. -DEFINE_string(benchmark_color, "auto"); +BM_DEFINE_string(benchmark_color, "auto"); // Whether to use tabular format when printing user counters to the console. // Valid values: 'true'/'yes'/1, 'false'/'no'/0. Defaults to false. -DEFINE_bool(benchmark_counters_tabular, false); - -// The level of verbose logging to output -DEFINE_int32(v, 0); +BM_DEFINE_bool(benchmark_counters_tabular, false); // List of additional perf counters to collect, in libpfm format. For more // information about libpfm: https://man7.org/linux/man-pages/man3/libpfm.3.html -DEFINE_string(benchmark_perf_counters, ""); - -namespace benchmark { -namespace internal { +BM_DEFINE_string(benchmark_perf_counters, ""); // Extra context to include in the output formatted as comma-separated key-value // pairs. Kept internal as it's only used for parsing from env/command line. -DEFINE_kvpairs(benchmark_context, {}); +BM_DEFINE_kvpairs(benchmark_context, {}); + +// The level of verbose logging to output +BM_DEFINE_int32(v, 0); + +namespace internal { std::map* global_context = nullptr; @@ -145,14 +145,14 @@ State::State(IterationCount max_iters, const std::vector& ranges, error_occurred_(false), range_(ranges), complexity_n_(0), - counters(), - thread_index(thread_i), - threads(n_threads), + thread_index_(thread_i), + threads_(n_threads), timer_(timer), manager_(manager), perf_counters_measurement_(perf_counters_measurement) { - CHECK(max_iterations != 0) << "At least one iteration must be run"; - CHECK_LT(thread_index, threads) << "thread_index must be less than threads"; + BM_CHECK(max_iterations != 0) << "At least one iteration must be run"; + BM_CHECK_LT(thread_index_, threads_) + << "thread_index must be less than threads"; // Note: The use of offsetof below is technically undefined until C++17 // because State is not a standard layout type. However, all compilers @@ -181,21 +181,21 @@ State::State(IterationCount max_iters, const std::vector& ranges, void State::PauseTiming() { // Add in time accumulated so far - CHECK(started_ && !finished_ && !error_occurred_); + BM_CHECK(started_ && !finished_ && !error_occurred_); timer_->StopTimer(); if (perf_counters_measurement_) { auto measurements = perf_counters_measurement_->StopAndGetMeasurements(); for (const auto& name_and_measurement : measurements) { auto name = name_and_measurement.first; auto measurement = name_and_measurement.second; - CHECK_EQ(counters[name], 0.0); + BM_CHECK_EQ(counters[name], 0.0); counters[name] = Counter(measurement, Counter::kAvgIterations); } } } void State::ResumeTiming() { - CHECK(started_ && !finished_ && !error_occurred_); + BM_CHECK(started_ && !finished_ && !error_occurred_); timer_->StartTimer(); if (perf_counters_measurement_) { perf_counters_measurement_->Start(); @@ -203,7 +203,7 @@ void State::ResumeTiming() { } void State::SkipWithError(const char* msg) { - CHECK(msg); + BM_CHECK(msg); error_occurred_ = true; { MutexLock l(manager_->GetBenchmarkMutex()); @@ -226,7 +226,7 @@ void State::SetLabel(const char* label) { } void State::StartKeepRunning() { - CHECK(!started_ && !finished_); + BM_CHECK(!started_ && !finished_); started_ = true; total_iterations_ = error_occurred_ ? 0 : max_iterations; manager_->StartStopBarrier(); @@ -234,7 +234,7 @@ void State::StartKeepRunning() { } void State::FinishKeepRunning() { - CHECK(started_ && (!finished_ || error_occurred_)); + BM_CHECK(started_ && (!finished_ || error_occurred_)); if (!error_occurred_) { PauseTiming(); } @@ -282,7 +282,7 @@ void RunBenchmarks(const std::vector& benchmarks, BenchmarkReporter* display_reporter, BenchmarkReporter* file_reporter) { // Note the file_reporter can be null. - CHECK(display_reporter != nullptr); + BM_CHECK(display_reporter != nullptr); // Determine the width of the name field using a minimum width of 10. bool might_have_aggregates = FLAGS_benchmark_repetitions > 1; @@ -328,7 +328,7 @@ void RunBenchmarks(const std::vector& benchmarks, } assert(runners.size() == benchmarks.size() && "Unexpected runner count."); - std::vector repetition_indices; + std::vector repetition_indices; repetition_indices.reserve(num_repetitions_total); for (size_t runner_index = 0, num_runners = runners.size(); runner_index != num_runners; ++runner_index) { @@ -362,7 +362,7 @@ void RunBenchmarks(const std::vector& benchmarks, additional_run_stats.begin(), additional_run_stats.end()); per_family_reports.erase( - (int)reports_for_family->Runs.front().family_index); + static_cast(reports_for_family->Runs.front().family_index)); } } @@ -377,10 +377,7 @@ void RunBenchmarks(const std::vector& benchmarks, // Disable deprecated warnings temporarily because we need to reference // CSVReporter but don't want to trigger -Werror=-Wdeprecated-declarations -#ifdef __GNUC__ -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" -#endif +BENCHMARK_DISABLE_DEPRECATED_WARNING std::unique_ptr CreateReporter( std::string const& name, ConsoleReporter::OutputOptions output_opts) { @@ -397,9 +394,7 @@ std::unique_ptr CreateReporter( } } -#ifdef __GNUC__ -#pragma GCC diagnostic pop -#endif +BENCHMARK_RESTORE_DEPRECATED_WARNING } // end namespace @@ -434,16 +429,32 @@ ConsoleReporter::OutputOptions GetOutputOptions(bool force_no_color) { } // end namespace internal size_t RunSpecifiedBenchmarks() { - return RunSpecifiedBenchmarks(nullptr, nullptr); + return RunSpecifiedBenchmarks(nullptr, nullptr, FLAGS_benchmark_filter); +} + +size_t RunSpecifiedBenchmarks(std::string spec) { + return RunSpecifiedBenchmarks(nullptr, nullptr, std::move(spec)); } size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter) { - return RunSpecifiedBenchmarks(display_reporter, nullptr); + return RunSpecifiedBenchmarks(display_reporter, nullptr, + FLAGS_benchmark_filter); +} + +size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter, + std::string spec) { + return RunSpecifiedBenchmarks(display_reporter, nullptr, std::move(spec)); } size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter, BenchmarkReporter* file_reporter) { - std::string spec = FLAGS_benchmark_filter; + return RunSpecifiedBenchmarks(display_reporter, file_reporter, + FLAGS_benchmark_filter); +} + +size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter, + BenchmarkReporter* file_reporter, + std::string spec) { if (spec.empty() || spec == "all") spec = "."; // Regexp that matches all benchmarks @@ -499,6 +510,8 @@ size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter, return benchmarks.size(); } +std::string GetBenchmarkFilter() { return FLAGS_benchmark_filter; } + void RegisterMemoryManager(MemoryManager* manager) { internal::memory_manager = manager; } @@ -530,6 +543,7 @@ void PrintUsageAndExit() { " [--benchmark_out_format=]\n" " [--benchmark_color={auto|true|false}]\n" " [--benchmark_counters_tabular={true|false}]\n" + " [--benchmark_perf_counters=,...]\n" " [--benchmark_context==,...]\n" " [--v=]\n"); exit(0); @@ -558,9 +572,6 @@ void ParseCommandLineFlags(int* argc, char** argv) { ParseStringFlag(argv[i], "benchmark_out_format", &FLAGS_benchmark_out_format) || ParseStringFlag(argv[i], "benchmark_color", &FLAGS_benchmark_color) || - // "color_print" is the deprecated name for "benchmark_color". - // TODO: Remove this. - ParseStringFlag(argv[i], "color_print", &FLAGS_benchmark_color) || ParseBoolFlag(argv[i], "benchmark_counters_tabular", &FLAGS_benchmark_counters_tabular) || ParseStringFlag(argv[i], "benchmark_perf_counters", @@ -602,9 +613,7 @@ void Initialize(int* argc, char** argv) { internal::LogLevel() = FLAGS_v; } -void Shutdown() { - delete internal::global_context; -} +void Shutdown() { delete internal::global_context; } bool ReportUnrecognizedArguments(int argc, char** argv) { for (int i = 1; i < argc; ++i) { diff --git a/third-party/benchmark/src/benchmark_api_internal.cc b/third-party/benchmark/src/benchmark_api_internal.cc index 89da519afc8c..4de36e3c8ba3 100644 --- a/third-party/benchmark/src/benchmark_api_internal.cc +++ b/third-party/benchmark/src/benchmark_api_internal.cc @@ -78,6 +78,9 @@ BenchmarkInstance::BenchmarkInstance(Benchmark* benchmark, int family_idx, if (!benchmark_.thread_counts_.empty()) { name_.threads = StrFormat("threads:%d", threads_); } + + setup_ = benchmark_.setup_; + teardown_ = benchmark_.teardown_; } State BenchmarkInstance::Run( @@ -90,5 +93,20 @@ State BenchmarkInstance::Run( return st; } +void BenchmarkInstance::Setup() const { + if (setup_) { + State st(/*iters*/ 1, args_, /*thread_id*/ 0, threads_, nullptr, nullptr, + nullptr); + setup_(st); + } +} + +void BenchmarkInstance::Teardown() const { + if (teardown_) { + State st(/*iters*/ 1, args_, /*thread_id*/ 0, threads_, nullptr, nullptr, + nullptr); + teardown_(st); + } +} } // namespace internal } // namespace benchmark diff --git a/third-party/benchmark/src/benchmark_api_internal.h b/third-party/benchmark/src/benchmark_api_internal.h index 9296b7d2c816..94c2b2972bb1 100644 --- a/third-party/benchmark/src/benchmark_api_internal.h +++ b/third-party/benchmark/src/benchmark_api_internal.h @@ -32,12 +32,14 @@ class BenchmarkInstance { bool use_real_time() const { return use_real_time_; } bool use_manual_time() const { return use_manual_time_; } BigO complexity() const { return complexity_; } - BigOFunc& complexity_lambda() const { return *complexity_lambda_; } + BigOFunc* complexity_lambda() const { return complexity_lambda_; } const std::vector& statistics() const { return statistics_; } int repetitions() const { return repetitions_; } double min_time() const { return min_time_; } IterationCount iterations() const { return iterations_; } int threads() const { return threads_; } + void Setup() const; + void Teardown() const; State Run(IterationCount iters, int thread_id, internal::ThreadTimer* timer, internal::ThreadManager* manager, @@ -62,6 +64,10 @@ class BenchmarkInstance { double min_time_; IterationCount iterations_; int threads_; // Number of concurrent threads to us + + typedef void (*callback_function)(const benchmark::State&); + callback_function setup_ = nullptr; + callback_function teardown_ = nullptr; }; bool FindBenchmarksInternal(const std::string& re, diff --git a/third-party/benchmark/src/benchmark_register.cc b/third-party/benchmark/src/benchmark_register.cc index 574462220e7c..61a0c26178e3 100644 --- a/third-party/benchmark/src/benchmark_register.cc +++ b/third-party/benchmark/src/benchmark_register.cc @@ -111,7 +111,7 @@ void BenchmarkFamilies::ClearBenchmarks() { bool BenchmarkFamilies::FindBenchmarks( std::string spec, std::vector* benchmarks, std::ostream* ErrStream) { - CHECK(ErrStream); + BM_CHECK(ErrStream); auto& Err = *ErrStream; // Make regular expression out of command-line flag std::string error_msg; @@ -211,10 +211,13 @@ Benchmark::Benchmark(const char* name) use_real_time_(false), use_manual_time_(false), complexity_(oNone), - complexity_lambda_(nullptr) { + complexity_lambda_(nullptr), + setup_(nullptr), + teardown_(nullptr) { ComputeStatistics("mean", StatisticsMean); ComputeStatistics("median", StatisticsMedian); ComputeStatistics("stddev", StatisticsStdDev); + ComputeStatistics("cv", StatisticsCV, kPercentage); } Benchmark::~Benchmark() {} @@ -225,7 +228,7 @@ Benchmark* Benchmark::Name(const std::string& name) { } Benchmark* Benchmark::Arg(int64_t x) { - CHECK(ArgsCnt() == -1 || ArgsCnt() == 1); + BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == 1); args_.push_back({x}); return this; } @@ -236,7 +239,7 @@ Benchmark* Benchmark::Unit(TimeUnit unit) { } Benchmark* Benchmark::Range(int64_t start, int64_t limit) { - CHECK(ArgsCnt() == -1 || ArgsCnt() == 1); + BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == 1); std::vector arglist; AddRange(&arglist, start, limit, range_multiplier_); @@ -248,7 +251,7 @@ Benchmark* Benchmark::Range(int64_t start, int64_t limit) { Benchmark* Benchmark::Ranges( const std::vector>& ranges) { - CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast(ranges.size())); + BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast(ranges.size())); std::vector> arglists(ranges.size()); for (std::size_t i = 0; i < ranges.size(); i++) { AddRange(&arglists[i], ranges[i].first, ranges[i].second, @@ -262,7 +265,7 @@ Benchmark* Benchmark::Ranges( Benchmark* Benchmark::ArgsProduct( const std::vector>& arglists) { - CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast(arglists.size())); + BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast(arglists.size())); std::vector indices(arglists.size()); const std::size_t total = std::accumulate( @@ -289,20 +292,20 @@ Benchmark* Benchmark::ArgsProduct( } Benchmark* Benchmark::ArgName(const std::string& name) { - CHECK(ArgsCnt() == -1 || ArgsCnt() == 1); + BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == 1); arg_names_ = {name}; return this; } Benchmark* Benchmark::ArgNames(const std::vector& names) { - CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast(names.size())); + BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast(names.size())); arg_names_ = names; return this; } Benchmark* Benchmark::DenseRange(int64_t start, int64_t limit, int step) { - CHECK(ArgsCnt() == -1 || ArgsCnt() == 1); - CHECK_LE(start, limit); + BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == 1); + BM_CHECK_LE(start, limit); for (int64_t arg = start; arg <= limit; arg += step) { args_.push_back({arg}); } @@ -310,7 +313,7 @@ Benchmark* Benchmark::DenseRange(int64_t start, int64_t limit, int step) { } Benchmark* Benchmark::Args(const std::vector& args) { - CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast(args.size())); + BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast(args.size())); args_.push_back(args); return this; } @@ -320,28 +323,40 @@ Benchmark* Benchmark::Apply(void (*custom_arguments)(Benchmark* benchmark)) { return this; } +Benchmark* Benchmark::Setup(void (*setup)(const benchmark::State&)) { + BM_CHECK(setup != nullptr); + setup_ = setup; + return this; +} + +Benchmark* Benchmark::Teardown(void (*teardown)(const benchmark::State&)) { + BM_CHECK(teardown != nullptr); + teardown_ = teardown; + return this; +} + Benchmark* Benchmark::RangeMultiplier(int multiplier) { - CHECK(multiplier > 1); + BM_CHECK(multiplier > 1); range_multiplier_ = multiplier; return this; } Benchmark* Benchmark::MinTime(double t) { - CHECK(t > 0.0); - CHECK(iterations_ == 0); + BM_CHECK(t > 0.0); + BM_CHECK(iterations_ == 0); min_time_ = t; return this; } Benchmark* Benchmark::Iterations(IterationCount n) { - CHECK(n > 0); - CHECK(IsZero(min_time_)); + BM_CHECK(n > 0); + BM_CHECK(IsZero(min_time_)); iterations_ = n; return this; } Benchmark* Benchmark::Repetitions(int n) { - CHECK(n > 0); + BM_CHECK(n > 0); repetitions_ = n; return this; } @@ -374,14 +389,14 @@ Benchmark* Benchmark::MeasureProcessCPUTime() { } Benchmark* Benchmark::UseRealTime() { - CHECK(!use_manual_time_) + BM_CHECK(!use_manual_time_) << "Cannot set UseRealTime and UseManualTime simultaneously."; use_real_time_ = true; return this; } Benchmark* Benchmark::UseManualTime() { - CHECK(!use_real_time_) + BM_CHECK(!use_real_time_) << "Cannot set UseRealTime and UseManualTime simultaneously."; use_manual_time_ = true; return this; @@ -398,21 +413,22 @@ Benchmark* Benchmark::Complexity(BigOFunc* complexity) { return this; } -Benchmark* Benchmark::ComputeStatistics(std::string name, - StatisticsFunc* statistics) { - statistics_.emplace_back(name, statistics); +Benchmark* Benchmark::ComputeStatistics(const std::string& name, + StatisticsFunc* statistics, + StatisticUnit unit) { + statistics_.emplace_back(name, statistics, unit); return this; } Benchmark* Benchmark::Threads(int t) { - CHECK_GT(t, 0); + BM_CHECK_GT(t, 0); thread_counts_.push_back(t); return this; } Benchmark* Benchmark::ThreadRange(int min_threads, int max_threads) { - CHECK_GT(min_threads, 0); - CHECK_GE(max_threads, min_threads); + BM_CHECK_GT(min_threads, 0); + BM_CHECK_GE(max_threads, min_threads); AddRange(&thread_counts_, min_threads, max_threads, 2); return this; @@ -420,9 +436,9 @@ Benchmark* Benchmark::ThreadRange(int min_threads, int max_threads) { Benchmark* Benchmark::DenseThreadRange(int min_threads, int max_threads, int stride) { - CHECK_GT(min_threads, 0); - CHECK_GE(max_threads, min_threads); - CHECK_GE(stride, 1); + BM_CHECK_GT(min_threads, 0); + BM_CHECK_GE(max_threads, min_threads); + BM_CHECK_GE(stride, 1); for (auto i = min_threads; i < max_threads; i += stride) { thread_counts_.push_back(i); @@ -458,4 +474,19 @@ void ClearRegisteredBenchmarks() { internal::BenchmarkFamilies::GetInstance()->ClearBenchmarks(); } +std::vector CreateRange(int64_t lo, int64_t hi, int multi) { + std::vector args; + internal::AddRange(&args, lo, hi, multi); + return args; +} + +std::vector CreateDenseRange(int64_t start, int64_t limit, int step) { + BM_CHECK_LE(start, limit); + std::vector args; + for (int64_t arg = start; arg <= limit; arg += step) { + args.push_back(arg); + } + return args; +} + } // end namespace benchmark diff --git a/third-party/benchmark/src/benchmark_register.h b/third-party/benchmark/src/benchmark_register.h index 09496607f224..d3f4974e9074 100644 --- a/third-party/benchmark/src/benchmark_register.h +++ b/third-party/benchmark/src/benchmark_register.h @@ -12,11 +12,11 @@ namespace internal { // Append the powers of 'mult' in the closed interval [lo, hi]. // Returns iterator to the start of the inserted range. template -typename std::vector::iterator -AddPowers(std::vector* dst, T lo, T hi, int mult) { - CHECK_GE(lo, 0); - CHECK_GE(hi, lo); - CHECK_GE(mult, 2); +typename std::vector::iterator AddPowers(std::vector* dst, T lo, T hi, + int mult) { + BM_CHECK_GE(lo, 0); + BM_CHECK_GE(hi, lo); + BM_CHECK_GE(mult, 2); const size_t start_offset = dst->size(); @@ -38,10 +38,10 @@ AddPowers(std::vector* dst, T lo, T hi, int mult) { template void AddNegatedPowers(std::vector* dst, T lo, T hi, int mult) { // We negate lo and hi so we require that they cannot be equal to 'min'. - CHECK_GT(lo, std::numeric_limits::min()); - CHECK_GT(hi, std::numeric_limits::min()); - CHECK_GE(hi, lo); - CHECK_LE(hi, 0); + BM_CHECK_GT(lo, std::numeric_limits::min()); + BM_CHECK_GT(hi, std::numeric_limits::min()); + BM_CHECK_GE(hi, lo); + BM_CHECK_LE(hi, 0); // Add positive powers, then negate and reverse. // Casts necessary since small integers get promoted @@ -60,8 +60,8 @@ void AddRange(std::vector* dst, T lo, T hi, int mult) { static_assert(std::is_integral::value && std::is_signed::value, "Args type must be a signed integer"); - CHECK_GE(hi, lo); - CHECK_GE(mult, 2); + BM_CHECK_GE(hi, lo); + BM_CHECK_GE(mult, 2); // Add "lo" dst->push_back(lo); diff --git a/third-party/benchmark/src/benchmark_runner.cc b/third-party/benchmark/src/benchmark_runner.cc index 6742d42dbecd..eac807b066f1 100644 --- a/third-party/benchmark/src/benchmark_runner.cc +++ b/third-party/benchmark/src/benchmark_runner.cc @@ -67,7 +67,7 @@ BenchmarkReporter::Run CreateRunReport( const benchmark::internal::BenchmarkInstance& b, const internal::ThreadManager::Result& results, IterationCount memory_iterations, - const MemoryManager::Result& memory_result, double seconds, + const MemoryManager::Result* memory_result, double seconds, int64_t repetition_index, int64_t repeats) { // Create report about this benchmark run. BenchmarkReporter::Run report; @@ -99,12 +99,12 @@ BenchmarkReporter::Run CreateRunReport( report.counters = results.counters; if (memory_iterations > 0) { - report.has_memory_result = true; + assert(memory_result != nullptr); + report.memory_result = memory_result; report.allocs_per_iter = - memory_iterations ? static_cast(memory_result.num_allocs) / + memory_iterations ? static_cast(memory_result->num_allocs) / memory_iterations : 0; - report.max_bytes_used = memory_result.max_bytes_used; } internal::Finish(&report.counters, results.iterations, seconds, @@ -124,7 +124,7 @@ void RunInThread(const BenchmarkInstance* b, IterationCount iters, : internal::ThreadTimer::Create()); State st = b->Run(iters, thread_id, &timer, manager, perf_counters_measurement); - CHECK(st.error_occurred() || st.iterations() >= st.max_iterations) + BM_CHECK(st.error_occurred() || st.iterations() >= st.max_iterations) << "Benchmark returned before State::KeepRunning() returned false!"; { MutexLock l(manager->GetBenchmarkMutex()); @@ -168,14 +168,14 @@ BenchmarkRunner::BenchmarkRunner( internal::ARM_DisplayReportAggregatesOnly); run_results.file_report_aggregates_only = (b.aggregation_report_mode() & internal::ARM_FileReportAggregatesOnly); - CHECK(FLAGS_benchmark_perf_counters.empty() || - perf_counters_measurement.IsValid()) + BM_CHECK(FLAGS_benchmark_perf_counters.empty() || + perf_counters_measurement.IsValid()) << "Perf counters were requested but could not be set up."; } } BenchmarkRunner::IterationResults BenchmarkRunner::DoNIterations() { - VLOG(2) << "Running " << b.name().str() << " for " << iters << "\n"; + BM_VLOG(2) << "Running " << b.name().str() << " for " << iters << "\n"; std::unique_ptr manager; manager.reset(new internal::ThreadManager(b.threads())); @@ -210,8 +210,8 @@ BenchmarkRunner::IterationResults BenchmarkRunner::DoNIterations() { // If we were measuring whole-process CPU usage, adjust the CPU time too. if (b.measure_process_cpu_time()) i.results.cpu_time_used /= b.threads(); - VLOG(2) << "Ran in " << i.results.cpu_time_used << "/" - << i.results.real_time_used << "\n"; + BM_VLOG(2) << "Ran in " << i.results.cpu_time_used << "/" + << i.results.real_time_used << "\n"; // By using KeepRunningBatch a benchmark can iterate more times than // requested, so take the iteration count from i.results. @@ -239,8 +239,7 @@ IterationCount BenchmarkRunner::PredictNumItersNeeded( // NOTE: When the last run was at least 10% of the min time the max // expansion should be 14x. bool is_significant = (i.seconds / min_time) > 0.1; - multiplier = is_significant ? multiplier : std::min(10.0, multiplier); - if (multiplier <= 1.0) multiplier = 2.0; + multiplier = is_significant ? multiplier : 10.0; // So what seems to be the sufficiently-large iteration count? Round up. const IterationCount max_next_iters = static_cast( @@ -249,7 +248,7 @@ IterationCount BenchmarkRunner::PredictNumItersNeeded( // But we do have *some* sanity limits though.. const IterationCount next_iters = std::min(max_next_iters, kMaxIterations); - VLOG(3) << "Next iters: " << next_iters << ", " << multiplier << "\n"; + BM_VLOG(3) << "Next iters: " << next_iters << ", " << multiplier << "\n"; return next_iters; // round up before conversion to integer. } @@ -280,7 +279,9 @@ void BenchmarkRunner::DoOneRepetition() { // is *only* calculated for the *first* repetition, and other repetitions // simply use that precomputed iteration count. for (;;) { + b.Setup(); i = DoNIterations(); + b.Teardown(); // Do we consider the results to be significant? // If we are doing repetitions, and the first repetition was already done, @@ -303,24 +304,33 @@ void BenchmarkRunner::DoOneRepetition() { } // Oh, one last thing, we need to also produce the 'memory measurements'.. - MemoryManager::Result memory_result; + MemoryManager::Result* memory_result = nullptr; IterationCount memory_iterations = 0; if (memory_manager != nullptr) { + // TODO(vyng): Consider making BenchmarkReporter::Run::memory_result an + // optional so we don't have to own the Result here. + // Can't do it now due to cxx03. + memory_results.push_back(MemoryManager::Result()); + memory_result = &memory_results.back(); // Only run a few iterations to reduce the impact of one-time // allocations in benchmarks that are not properly managed. memory_iterations = std::min(16, iters); memory_manager->Start(); std::unique_ptr manager; manager.reset(new internal::ThreadManager(1)); + b.Setup(); RunInThread(&b, memory_iterations, 0, manager.get(), perf_counters_measurement_ptr); manager->WaitForAllThreads(); manager.reset(); + b.Teardown(); - memory_manager->Stop(&memory_result); + BENCHMARK_DISABLE_DEPRECATED_WARNING + memory_manager->Stop(memory_result); + BENCHMARK_RESTORE_DEPRECATED_WARNING } - // Ok, now actualy report. + // Ok, now actually report. BenchmarkReporter::Run report = CreateRunReport(b, i.results, memory_iterations, memory_result, i.seconds, num_repetitions_done, repeats); diff --git a/third-party/benchmark/src/benchmark_runner.h b/third-party/benchmark/src/benchmark_runner.h index 8a855236b227..752eefdc26fa 100644 --- a/third-party/benchmark/src/benchmark_runner.h +++ b/third-party/benchmark/src/benchmark_runner.h @@ -23,18 +23,14 @@ #include "perf_counters.h" #include "thread_manager.h" -DECLARE_double(benchmark_min_time); - -DECLARE_int32(benchmark_repetitions); - -DECLARE_bool(benchmark_report_aggregates_only); - -DECLARE_bool(benchmark_display_aggregates_only); - -DECLARE_string(benchmark_perf_counters); - namespace benchmark { +BM_DECLARE_double(benchmark_min_time); +BM_DECLARE_int32(benchmark_repetitions); +BM_DECLARE_bool(benchmark_report_aggregates_only); +BM_DECLARE_bool(benchmark_display_aggregates_only); +BM_DECLARE_string(benchmark_perf_counters); + namespace internal { extern MemoryManager* memory_manager; @@ -64,7 +60,7 @@ class BenchmarkRunner { BenchmarkReporter::PerFamilyRunReports* GetReportsForFamily() const { return reports_for_family; - }; + } private: RunResults run_results; @@ -80,6 +76,8 @@ class BenchmarkRunner { std::vector pool; + std::vector memory_results; + IterationCount iters; // preserved between repetitions! // So only the first repetition has to find/calculate it, // the other repetitions will just use that precomputed iteration count. diff --git a/third-party/benchmark/src/check.h b/third-party/benchmark/src/check.h index f5f8253f8040..0efd13ff4db6 100644 --- a/third-party/benchmark/src/check.h +++ b/third-party/benchmark/src/check.h @@ -23,8 +23,9 @@ BENCHMARK_NORETURN inline void CallAbortHandler() { std::abort(); // fallback to enforce noreturn } -// CheckHandler is the class constructed by failing CHECK macros. CheckHandler -// will log information about the failures and abort when it is destructed. +// CheckHandler is the class constructed by failing BM_CHECK macros. +// CheckHandler will log information about the failures and abort when it is +// destructed. class CheckHandler { public: CheckHandler(const char* check, const char* file, const char* func, int line) @@ -51,32 +52,32 @@ class CheckHandler { } // end namespace internal } // end namespace benchmark -// The CHECK macro returns a std::ostream object that can have extra information -// written to it. +// The BM_CHECK macro returns a std::ostream object that can have extra +// information written to it. #ifndef NDEBUG -#define CHECK(b) \ +#define BM_CHECK(b) \ (b ? ::benchmark::internal::GetNullLogInstance() \ : ::benchmark::internal::CheckHandler(#b, __FILE__, __func__, __LINE__) \ .GetLog()) #else -#define CHECK(b) ::benchmark::internal::GetNullLogInstance() +#define BM_CHECK(b) ::benchmark::internal::GetNullLogInstance() #endif // clang-format off // preserve whitespacing between operators for alignment -#define CHECK_EQ(a, b) CHECK((a) == (b)) -#define CHECK_NE(a, b) CHECK((a) != (b)) -#define CHECK_GE(a, b) CHECK((a) >= (b)) -#define CHECK_LE(a, b) CHECK((a) <= (b)) -#define CHECK_GT(a, b) CHECK((a) > (b)) -#define CHECK_LT(a, b) CHECK((a) < (b)) +#define BM_CHECK_EQ(a, b) BM_CHECK((a) == (b)) +#define BM_CHECK_NE(a, b) BM_CHECK((a) != (b)) +#define BM_CHECK_GE(a, b) BM_CHECK((a) >= (b)) +#define BM_CHECK_LE(a, b) BM_CHECK((a) <= (b)) +#define BM_CHECK_GT(a, b) BM_CHECK((a) > (b)) +#define BM_CHECK_LT(a, b) BM_CHECK((a) < (b)) -#define CHECK_FLOAT_EQ(a, b, eps) CHECK(std::fabs((a) - (b)) < (eps)) -#define CHECK_FLOAT_NE(a, b, eps) CHECK(std::fabs((a) - (b)) >= (eps)) -#define CHECK_FLOAT_GE(a, b, eps) CHECK((a) - (b) > -(eps)) -#define CHECK_FLOAT_LE(a, b, eps) CHECK((b) - (a) > -(eps)) -#define CHECK_FLOAT_GT(a, b, eps) CHECK((a) - (b) > (eps)) -#define CHECK_FLOAT_LT(a, b, eps) CHECK((b) - (a) > (eps)) +#define BM_CHECK_FLOAT_EQ(a, b, eps) BM_CHECK(std::fabs((a) - (b)) < (eps)) +#define BM_CHECK_FLOAT_NE(a, b, eps) BM_CHECK(std::fabs((a) - (b)) >= (eps)) +#define BM_CHECK_FLOAT_GE(a, b, eps) BM_CHECK((a) - (b) > -(eps)) +#define BM_CHECK_FLOAT_LE(a, b, eps) BM_CHECK((b) - (a) > -(eps)) +#define BM_CHECK_FLOAT_GT(a, b, eps) BM_CHECK((a) - (b) > (eps)) +#define BM_CHECK_FLOAT_LT(a, b, eps) BM_CHECK((b) - (a) > (eps)) //clang-format on #endif // CHECK_H_ diff --git a/third-party/benchmark/src/colorprint.cc b/third-party/benchmark/src/colorprint.cc index fff6a98818b8..1a000a063736 100644 --- a/third-party/benchmark/src/colorprint.cc +++ b/third-party/benchmark/src/colorprint.cc @@ -25,8 +25,8 @@ #include "internal_macros.h" #ifdef BENCHMARK_OS_WINDOWS -#include #include +#include #else #include #endif // BENCHMARK_OS_WINDOWS @@ -94,7 +94,7 @@ std::string FormatString(const char* msg, va_list args) { va_end(args_cp); // currently there is no error handling for failure, so this is hack. - CHECK(ret >= 0); + BM_CHECK(ret >= 0); if (ret == 0) // handle empty expansion return {}; @@ -102,10 +102,10 @@ std::string FormatString(const char* msg, va_list args) { return local_buff; else { // we did not provide a long enough buffer on our first attempt. - size = (size_t)ret + 1; // + 1 for the null byte + size = static_cast(ret) + 1; // + 1 for the null byte std::unique_ptr buff(new char[size]); ret = vsnprintf(buff.get(), size, msg, args); - CHECK(ret > 0 && ((size_t)ret) < size); + BM_CHECK(ret > 0 && (static_cast(ret)) < size); return buff.get(); } } diff --git a/third-party/benchmark/src/commandlineflags.cc b/third-party/benchmark/src/commandlineflags.cc index 5724aaa29402..9615e351ffae 100644 --- a/third-party/benchmark/src/commandlineflags.cc +++ b/third-party/benchmark/src/commandlineflags.cc @@ -248,9 +248,8 @@ bool ParseStringFlag(const char* str, const char* flag, std::string* value) { return true; } -bool ParseKeyValueFlag( - const char* str, const char* flag, - std::map* value) { +bool ParseKeyValueFlag(const char* str, const char* flag, + std::map* value) { const char* const value_str = ParseFlagValue(str, flag, false); if (value_str == nullptr) return false; diff --git a/third-party/benchmark/src/commandlineflags.h b/third-party/benchmark/src/commandlineflags.h index 0c988cccb3ae..5baaf11784df 100644 --- a/third-party/benchmark/src/commandlineflags.h +++ b/third-party/benchmark/src/commandlineflags.h @@ -9,23 +9,23 @@ #define FLAG(name) FLAGS_##name // Macros for declaring flags. -#define DECLARE_bool(name) extern bool FLAG(name) -#define DECLARE_int32(name) extern int32_t FLAG(name) -#define DECLARE_double(name) extern double FLAG(name) -#define DECLARE_string(name) extern std::string FLAG(name) -#define DECLARE_kvpairs(name) \ +#define BM_DECLARE_bool(name) extern bool FLAG(name) +#define BM_DECLARE_int32(name) extern int32_t FLAG(name) +#define BM_DECLARE_double(name) extern double FLAG(name) +#define BM_DECLARE_string(name) extern std::string FLAG(name) +#define BM_DECLARE_kvpairs(name) \ extern std::map FLAG(name) // Macros for defining flags. -#define DEFINE_bool(name, default_val) \ +#define BM_DEFINE_bool(name, default_val) \ bool FLAG(name) = benchmark::BoolFromEnv(#name, default_val) -#define DEFINE_int32(name, default_val) \ +#define BM_DEFINE_int32(name, default_val) \ int32_t FLAG(name) = benchmark::Int32FromEnv(#name, default_val) -#define DEFINE_double(name, default_val) \ +#define BM_DEFINE_double(name, default_val) \ double FLAG(name) = benchmark::DoubleFromEnv(#name, default_val) -#define DEFINE_string(name, default_val) \ +#define BM_DEFINE_string(name, default_val) \ std::string FLAG(name) = benchmark::StringFromEnv(#name, default_val) -#define DEFINE_kvpairs(name, default_val) \ +#define BM_DEFINE_kvpairs(name, default_val) \ std::map FLAG(name) = \ benchmark::KvPairsFromEnv(#name, default_val) diff --git a/third-party/benchmark/src/complexity.cc b/third-party/benchmark/src/complexity.cc index 29f7c3b03155..825c57394a8c 100644 --- a/third-party/benchmark/src/complexity.cc +++ b/third-party/benchmark/src/complexity.cc @@ -15,12 +15,13 @@ // Source project : https://github.com/ismaelJimenez/cpp.leastsq // Adapted to be used with google benchmark -#include "benchmark/benchmark.h" +#include "complexity.h" #include #include + +#include "benchmark/benchmark.h" #include "check.h" -#include "complexity.h" namespace benchmark { @@ -123,10 +124,10 @@ LeastSq MinimalLeastSq(const std::vector& n, // fitting curve. LeastSq MinimalLeastSq(const std::vector& n, const std::vector& time, const BigO complexity) { - CHECK_EQ(n.size(), time.size()); - CHECK_GE(n.size(), 2); // Do not compute fitting curve is less than two - // benchmark runs are given - CHECK_NE(complexity, oNone); + BM_CHECK_EQ(n.size(), time.size()); + BM_CHECK_GE(n.size(), 2); // Do not compute fitting curve is less than two + // benchmark runs are given + BM_CHECK_NE(complexity, oNone); LeastSq best_fit; @@ -167,7 +168,8 @@ std::vector ComputeBigO( // Populate the accumulators. for (const Run& run : reports) { - CHECK_GT(run.complexity_n, 0) << "Did you forget to call SetComplexityN?"; + BM_CHECK_GT(run.complexity_n, 0) + << "Did you forget to call SetComplexityN?"; n.push_back(run.complexity_n); real_time.push_back(run.real_accumulated_time / run.iterations); cpu_time.push_back(run.cpu_accumulated_time / run.iterations); @@ -198,6 +200,7 @@ std::vector ComputeBigO( big_o.repetition_index = Run::no_repetition_index; big_o.threads = reports[0].threads; big_o.aggregate_name = "BigO"; + big_o.aggregate_unit = StatisticUnit::kTime; big_o.report_label = reports[0].report_label; big_o.iterations = 0; big_o.real_accumulated_time = result_real.coef; @@ -219,6 +222,7 @@ std::vector ComputeBigO( rms.per_family_instance_index = reports[0].per_family_instance_index; rms.run_type = BenchmarkReporter::Run::RT_Aggregate; rms.aggregate_name = "RMS"; + rms.aggregate_unit = StatisticUnit::kPercentage; rms.report_label = big_o.report_label; rms.iterations = 0; rms.repetition_index = Run::no_repetition_index; diff --git a/third-party/benchmark/src/console_reporter.cc b/third-party/benchmark/src/console_reporter.cc index 6fd764525e81..04cc0b74e58e 100644 --- a/third-party/benchmark/src/console_reporter.cc +++ b/third-party/benchmark/src/console_reporter.cc @@ -45,7 +45,7 @@ bool ConsoleReporter::ReportContext(const Context& context) { GetErrorStream() << "Color printing is only supported for stdout on windows." " Disabling color printing\n"; - output_options_ = static_cast< OutputOptions >(output_options_ & ~OO_Color); + output_options_ = static_cast(output_options_ & ~OO_Color); } #endif @@ -53,11 +53,12 @@ bool ConsoleReporter::ReportContext(const Context& context) { } void ConsoleReporter::PrintHeader(const Run& run) { - std::string str = FormatString("%-*s %13s %15s %12s", static_cast(name_field_width_), - "Benchmark", "Time", "CPU", "Iterations"); - if(!run.counters.empty()) { - if(output_options_ & OO_Tabular) { - for(auto const& c : run.counters) { + std::string str = + FormatString("%-*s %13s %15s %12s", static_cast(name_field_width_), + "Benchmark", "Time", "CPU", "Iterations"); + if (!run.counters.empty()) { + if (output_options_ & OO_Tabular) { + for (auto const& c : run.counters) { str += FormatString(" %10s", c.first.c_str()); } } else { @@ -97,7 +98,6 @@ static void IgnoreColorPrint(std::ostream& out, LogColor, const char* fmt, va_end(args); } - static std::string FormatTime(double time) { // Align decimal places... if (time < 1.0) { @@ -115,8 +115,9 @@ static std::string FormatTime(double time) { void ConsoleReporter::PrintRunData(const Run& result) { typedef void(PrinterFn)(std::ostream&, LogColor, const char*, ...); auto& Out = GetOutputStream(); - PrinterFn* printer = (output_options_ & OO_Color) ? - (PrinterFn*)ColorPrintf : IgnoreColorPrint; + PrinterFn* printer = (output_options_ & OO_Color) + ? static_cast(ColorPrintf) + : IgnoreColorPrint; auto name_color = (result.report_big_o || result.report_rms) ? COLOR_BLUE : COLOR_GREEN; printer(Out, name_color, "%-*s ", name_field_width_, @@ -134,18 +135,23 @@ void ConsoleReporter::PrintRunData(const Run& result) { const std::string real_time_str = FormatTime(real_time); const std::string cpu_time_str = FormatTime(cpu_time); - if (result.report_big_o) { std::string big_o = GetBigOString(result.complexity); - printer(Out, COLOR_YELLOW, "%10.2f %-4s %10.2f %-4s ", real_time, big_o.c_str(), - cpu_time, big_o.c_str()); + printer(Out, COLOR_YELLOW, "%10.2f %-4s %10.2f %-4s ", real_time, + big_o.c_str(), cpu_time, big_o.c_str()); } else if (result.report_rms) { printer(Out, COLOR_YELLOW, "%10.0f %-4s %10.0f %-4s ", real_time * 100, "%", cpu_time * 100, "%"); - } else { + } else if (result.run_type != Run::RT_Aggregate || + result.aggregate_unit == StatisticUnit::kTime) { const char* timeLabel = GetTimeUnitString(result.time_unit); - printer(Out, COLOR_YELLOW, "%s %-4s %s %-4s ", real_time_str.c_str(), timeLabel, - cpu_time_str.c_str(), timeLabel); + printer(Out, COLOR_YELLOW, "%s %-4s %s %-4s ", real_time_str.c_str(), + timeLabel, cpu_time_str.c_str(), timeLabel); + } else { + assert(result.aggregate_unit == StatisticUnit::kPercentage); + printer(Out, COLOR_YELLOW, "%10.2f %-4s %10.2f %-4s ", + (100. * result.real_accumulated_time), "%", + (100. * result.cpu_accumulated_time), "%"); } if (!result.report_big_o && !result.report_rms) { @@ -153,12 +159,19 @@ void ConsoleReporter::PrintRunData(const Run& result) { } for (auto& c : result.counters) { - const std::size_t cNameLen = std::max(std::string::size_type(10), - c.first.length()); - auto const& s = HumanReadableNumber(c.second.value, c.second.oneK); + const std::size_t cNameLen = + std::max(std::string::size_type(10), c.first.length()); + std::string s; const char* unit = ""; - if (c.second.flags & Counter::kIsRate) - unit = (c.second.flags & Counter::kInvert) ? "s" : "/s"; + if (result.run_type == Run::RT_Aggregate && + result.aggregate_unit == StatisticUnit::kPercentage) { + s = StrFormat("%.2f", 100. * c.second.value); + unit = "%"; + } else { + s = HumanReadableNumber(c.second.value, c.second.oneK); + if (c.second.flags & Counter::kIsRate) + unit = (c.second.flags & Counter::kInvert) ? "s" : "/s"; + } if (output_options_ & OO_Tabular) { printer(Out, COLOR_DEFAULT, " %*s%s", cNameLen - strlen(unit), s.c_str(), unit); diff --git a/third-party/benchmark/src/csv_reporter.cc b/third-party/benchmark/src/csv_reporter.cc index af2c18fc8a6e..1c5e9fa6689c 100644 --- a/third-party/benchmark/src/csv_reporter.cc +++ b/third-party/benchmark/src/csv_reporter.cc @@ -12,9 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "benchmark/benchmark.h" -#include "complexity.h" - #include #include #include @@ -22,7 +19,9 @@ #include #include +#include "benchmark/benchmark.h" #include "check.h" +#include "complexity.h" #include "string_util.h" #include "timers.h" @@ -37,13 +36,17 @@ std::vector elements = { "error_occurred", "error_message"}; } // namespace -std::string CsvEscape(const std::string & s) { +std::string CsvEscape(const std::string& s) { std::string tmp; tmp.reserve(s.size() + 2); for (char c : s) { switch (c) { - case '"' : tmp += "\"\""; break; - default : tmp += c; break; + case '"': + tmp += "\"\""; + break; + default: + tmp += c; + break; } } return '"' + tmp + '"'; @@ -85,7 +88,8 @@ void CSVReporter::ReportRuns(const std::vector& reports) { for (const auto& cnt : run.counters) { if (cnt.first == "bytes_per_second" || cnt.first == "items_per_second") continue; - CHECK(user_counter_names_.find(cnt.first) != user_counter_names_.end()) + BM_CHECK(user_counter_names_.find(cnt.first) != + user_counter_names_.end()) << "All counters must be present in each run. " << "Counter named \"" << cnt.first << "\" was not in a run after being added to the header"; diff --git a/third-party/benchmark/src/cycleclock.h b/third-party/benchmark/src/cycleclock.h index f22ca9f7d299..d65d32a39d3d 100644 --- a/third-party/benchmark/src/cycleclock.h +++ b/third-party/benchmark/src/cycleclock.h @@ -115,7 +115,7 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() { // the code is being compiled with a non-ancient compiler. _asm rdtsc #elif defined(COMPILER_MSVC) && defined(_M_ARM64) - // See https://docs.microsoft.com/en-us/cpp/intrinsics/arm64-intrinsics?view=vs-2019 + // See // https://docs.microsoft.com/en-us/cpp/intrinsics/arm64-intrinsics // and https://reviews.llvm.org/D53115 int64_t virtual_timer_value; virtual_timer_value = _ReadStatusReg(ARM64_CNTVCT); @@ -187,7 +187,7 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() { asm("stck %0" : "=Q"(tsc) : : "cc"); #endif return tsc; -#elif defined(__riscv) // RISC-V +#elif defined(__riscv) // RISC-V // Use RDCYCLE (and RDCYCLEH on riscv32) #if __riscv_xlen == 32 uint32_t cycles_lo, cycles_hi0, cycles_hi1; diff --git a/third-party/benchmark/src/json_reporter.cc b/third-party/benchmark/src/json_reporter.cc index 26898456f854..e84a4ed24f9d 100644 --- a/third-party/benchmark/src/json_reporter.cc +++ b/third-party/benchmark/src/json_reporter.cc @@ -12,9 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "benchmark/benchmark.h" -#include "complexity.h" - #include #include #include @@ -25,6 +22,8 @@ #include #include +#include "benchmark/benchmark.h" +#include "complexity.h" #include "string_util.h" #include "timers.h" @@ -35,34 +34,53 @@ extern std::map* global_context; namespace { -std::string StrEscape(const std::string & s) { +std::string StrEscape(const std::string& s) { std::string tmp; tmp.reserve(s.size()); for (char c : s) { switch (c) { - case '\b': tmp += "\\b"; break; - case '\f': tmp += "\\f"; break; - case '\n': tmp += "\\n"; break; - case '\r': tmp += "\\r"; break; - case '\t': tmp += "\\t"; break; - case '\\': tmp += "\\\\"; break; - case '"' : tmp += "\\\""; break; - default : tmp += c; break; + case '\b': + tmp += "\\b"; + break; + case '\f': + tmp += "\\f"; + break; + case '\n': + tmp += "\\n"; + break; + case '\r': + tmp += "\\r"; + break; + case '\t': + tmp += "\\t"; + break; + case '\\': + tmp += "\\\\"; + break; + case '"': + tmp += "\\\""; + break; + default: + tmp += c; + break; } } return tmp; } std::string FormatKV(std::string const& key, std::string const& value) { - return StrFormat("\"%s\": \"%s\"", StrEscape(key).c_str(), StrEscape(value).c_str()); + return StrFormat("\"%s\": \"%s\"", StrEscape(key).c_str(), + StrEscape(value).c_str()); } std::string FormatKV(std::string const& key, const char* value) { - return StrFormat("\"%s\": \"%s\"", StrEscape(key).c_str(), StrEscape(value).c_str()); + return StrFormat("\"%s\": \"%s\"", StrEscape(key).c_str(), + StrEscape(value).c_str()); } std::string FormatKV(std::string const& key, bool value) { - return StrFormat("\"%s\": %s", StrEscape(key).c_str(), value ? "true" : "false"); + return StrFormat("\"%s\": %s", StrEscape(key).c_str(), + value ? "true" : "false"); } std::string FormatKV(std::string const& key, int64_t value) { @@ -126,7 +144,9 @@ bool JSONReporter::ReportContext(const Context& context) { RoundDouble(info.cycles_per_second / 1000000.0)) << ",\n"; if (CPUInfo::Scaling::UNKNOWN != info.scaling) { - out << indent << FormatKV("cpu_scaling_enabled", info.scaling == CPUInfo::Scaling::ENABLED ? true : false) + out << indent + << FormatKV("cpu_scaling_enabled", + info.scaling == CPUInfo::Scaling::ENABLED ? true : false) << ",\n"; } @@ -139,8 +159,8 @@ bool JSONReporter::ReportContext(const Context& context) { out << cache_indent << FormatKV("type", CI.type) << ",\n"; out << cache_indent << FormatKV("level", static_cast(CI.level)) << ",\n"; - out << cache_indent - << FormatKV("size", static_cast(CI.size)) << ",\n"; + out << cache_indent << FormatKV("size", static_cast(CI.size)) + << ",\n"; out << cache_indent << FormatKV("num_sharing", static_cast(CI.num_sharing)) << "\n"; @@ -162,13 +182,15 @@ bool JSONReporter::ReportContext(const Context& context) { #else const char build_type[] = "debug"; #endif - out << indent << FormatKV("library_build_type", build_type) << "\n"; + out << indent << FormatKV("library_build_type", build_type); if (internal::global_context != nullptr) { - for (const auto& kv: *internal::global_context) { - out << indent << FormatKV(kv.first, kv.second) << "\n"; + for (const auto& kv : *internal::global_context) { + out << ",\n"; + out << indent << FormatKV(kv.first, kv.second); } } + out << "\n"; // Close context block and open the list of benchmarks. out << inner_indent << "},\n"; @@ -229,6 +251,15 @@ void JSONReporter::PrintRunData(Run const& run) { out << indent << FormatKV("threads", run.threads) << ",\n"; if (run.run_type == BenchmarkReporter::Run::RT_Aggregate) { out << indent << FormatKV("aggregate_name", run.aggregate_name) << ",\n"; + out << indent << FormatKV("aggregate_unit", [&run]() -> const char* { + switch (run.aggregate_unit) { + case StatisticUnit::kTime: + return "time"; + case StatisticUnit::kPercentage: + return "percentage"; + } + BENCHMARK_UNREACHABLE(); + }()) << ",\n"; } if (run.error_occurred) { out << indent << FormatKV("error_occurred", run.error_occurred) << ",\n"; @@ -236,8 +267,17 @@ void JSONReporter::PrintRunData(Run const& run) { } if (!run.report_big_o && !run.report_rms) { out << indent << FormatKV("iterations", run.iterations) << ",\n"; - out << indent << FormatKV("real_time", run.GetAdjustedRealTime()) << ",\n"; - out << indent << FormatKV("cpu_time", run.GetAdjustedCPUTime()); + if (run.run_type != Run::RT_Aggregate || + run.aggregate_unit == StatisticUnit::kTime) { + out << indent << FormatKV("real_time", run.GetAdjustedRealTime()) + << ",\n"; + out << indent << FormatKV("cpu_time", run.GetAdjustedCPUTime()); + } else { + assert(run.aggregate_unit == StatisticUnit::kPercentage); + out << indent << FormatKV("real_time", run.real_accumulated_time) + << ",\n"; + out << indent << FormatKV("cpu_time", run.cpu_accumulated_time); + } out << ",\n" << indent << FormatKV("time_unit", GetTimeUnitString(run.time_unit)); } else if (run.report_big_o) { @@ -255,9 +295,20 @@ void JSONReporter::PrintRunData(Run const& run) { out << ",\n" << indent << FormatKV(c.first, c.second); } - if (run.has_memory_result) { + if (run.memory_result) { + const MemoryManager::Result memory_result = *run.memory_result; out << ",\n" << indent << FormatKV("allocs_per_iter", run.allocs_per_iter); - out << ",\n" << indent << FormatKV("max_bytes_used", run.max_bytes_used); + out << ",\n" + << indent << FormatKV("max_bytes_used", memory_result.max_bytes_used); + + auto report_if_present = [&out, &indent](const char* label, int64_t val) { + if (val != MemoryManager::TombstoneValue) + out << ",\n" << indent << FormatKV(label, val); + }; + + report_if_present("total_allocated_bytes", + memory_result.total_allocated_bytes); + report_if_present("net_heap_growth", memory_result.net_heap_growth); } if (!run.report_label.empty()) { @@ -266,4 +317,7 @@ void JSONReporter::PrintRunData(Run const& run) { out << '\n'; } +const int64_t MemoryManager::TombstoneValue = + std::numeric_limits::max(); + } // end namespace benchmark diff --git a/third-party/benchmark/src/log.h b/third-party/benchmark/src/log.h index 47d0c35c0182..48c071aded8f 100644 --- a/third-party/benchmark/src/log.h +++ b/third-party/benchmark/src/log.h @@ -67,7 +67,7 @@ inline LogType& GetLogInstanceForLevel(int level) { } // end namespace benchmark // clang-format off -#define VLOG(x) \ +#define BM_VLOG(x) \ (::benchmark::internal::GetLogInstanceForLevel(x) << "-- LOG(" << x << "):" \ " ") // clang-format on diff --git a/third-party/benchmark/src/mutex.h b/third-party/benchmark/src/mutex.h index 9cc414ec467e..bec78d9e5f91 100644 --- a/third-party/benchmark/src/mutex.h +++ b/third-party/benchmark/src/mutex.h @@ -130,7 +130,7 @@ class Barrier { // entered the barrier. Returns iff this is the last thread to // enter the barrier. bool createBarrier(MutexLock& ml) REQUIRES(lock_) { - CHECK_LT(entered_, running_threads_); + BM_CHECK_LT(entered_, running_threads_); entered_++; if (entered_ < running_threads_) { // Wait for all threads to enter diff --git a/third-party/benchmark/src/perf_counters.cc b/third-party/benchmark/src/perf_counters.cc index 4ddf0de2502c..b2ac7687efef 100644 --- a/third-party/benchmark/src/perf_counters.cc +++ b/third-party/benchmark/src/perf_counters.cc @@ -49,7 +49,7 @@ PerfCounters PerfCounters::Create( const int mode = PFM_PLM3; // user mode only for (size_t i = 0; i < counter_names.size(); ++i) { const bool is_first = i == 0; - struct perf_event_attr attr{}; + struct perf_event_attr attr {}; attr.size = sizeof(attr); const int group_id = !is_first ? counter_ids[0] : -1; const auto& name = counter_names[i]; diff --git a/third-party/benchmark/src/perf_counters.h b/third-party/benchmark/src/perf_counters.h index b6629b99070b..47ca1385e24d 100644 --- a/third-party/benchmark/src/perf_counters.h +++ b/third-party/benchmark/src/perf_counters.h @@ -42,7 +42,7 @@ namespace internal { class PerfCounterValues { public: explicit PerfCounterValues(size_t nr_counters) : nr_counters_(nr_counters) { - CHECK_LE(nr_counters_, kMaxCounters); + BM_CHECK_LE(nr_counters_, kMaxCounters); } uint64_t operator[](size_t pos) const { return values_[kPadding + pos]; } diff --git a/third-party/benchmark/src/re.h b/third-party/benchmark/src/re.h index fbe25037b463..630046782dc4 100644 --- a/third-party/benchmark/src/re.h +++ b/third-party/benchmark/src/re.h @@ -126,7 +126,7 @@ inline bool Regex::Init(const std::string& spec, std::string* error) { // regerror returns the number of bytes necessary to null terminate // the string, so we move that when assigning to error. - CHECK_NE(needed, 0); + BM_CHECK_NE(needed, 0); error->assign(errbuf, needed - 1); delete[] errbuf; diff --git a/third-party/benchmark/src/reporter.cc b/third-party/benchmark/src/reporter.cc index 14dd40dc72f4..1d2df17b90f0 100644 --- a/third-party/benchmark/src/reporter.cc +++ b/third-party/benchmark/src/reporter.cc @@ -12,23 +12,21 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "benchmark/benchmark.h" -#include "timers.h" - #include - #include #include #include #include #include +#include "benchmark/benchmark.h" #include "check.h" #include "string_util.h" +#include "timers.h" namespace benchmark { namespace internal { -extern std::map* global_context; +extern std::map *global_context; } BenchmarkReporter::BenchmarkReporter() @@ -38,7 +36,7 @@ BenchmarkReporter::~BenchmarkReporter() {} void BenchmarkReporter::PrintBasicContext(std::ostream *out, Context const &context) { - CHECK(out) << "cannot be null"; + BM_CHECK(out) << "cannot be null"; auto &Out = *out; Out << LocalDateTimeString() << "\n"; @@ -70,7 +68,7 @@ void BenchmarkReporter::PrintBasicContext(std::ostream *out, } if (internal::global_context != nullptr) { - for (const auto& kv: *internal::global_context) { + for (const auto &kv : *internal::global_context) { Out << kv.first << ": " << kv.second << "\n"; } } diff --git a/third-party/benchmark/src/sleep.cc b/third-party/benchmark/src/sleep.cc index 4609d540eade..ab59000f24ad 100644 --- a/third-party/benchmark/src/sleep.cc +++ b/third-party/benchmark/src/sleep.cc @@ -35,7 +35,7 @@ void SleepForMilliseconds(int milliseconds) { Sleep(milliseconds); } void SleepForSeconds(double seconds) { SleepForMilliseconds(static_cast(kNumMillisPerSecond * seconds)); } -#else // BENCHMARK_OS_WINDOWS +#else // BENCHMARK_OS_WINDOWS void SleepForMicroseconds(int microseconds) { #ifdef BENCHMARK_OS_ZOS // z/OS does not support nanosleep. Instead call sleep() and then usleep() to @@ -43,8 +43,7 @@ void SleepForMicroseconds(int microseconds) { // argument is greater than 1000000. div_t sleepTime = div(microseconds, kNumMicrosPerSecond); int seconds = sleepTime.quot; - while (seconds != 0) - seconds = sleep(seconds); + while (seconds != 0) seconds = sleep(seconds); while (usleep(sleepTime.rem) == -1 && errno == EINTR) ; #else diff --git a/third-party/benchmark/src/statistics.cc b/third-party/benchmark/src/statistics.cc index 57472b9ff99b..3e5ef0993971 100644 --- a/third-party/benchmark/src/statistics.cc +++ b/third-party/benchmark/src/statistics.cc @@ -13,15 +13,16 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "benchmark/benchmark.h" +#include "statistics.h" #include #include #include #include #include + +#include "benchmark/benchmark.h" #include "check.h" -#include "statistics.h" namespace benchmark { @@ -74,6 +75,15 @@ double StatisticsStdDev(const std::vector& v) { return Sqrt(v.size() / (v.size() - 1.0) * (avg_squares - Sqr(mean))); } +double StatisticsCV(const std::vector& v) { + if (v.size() < 2) return 0.0; + + const auto stddev = StatisticsStdDev(v); + const auto mean = StatisticsMean(v); + + return stddev / mean; +} + std::vector ComputeStats( const std::vector& reports) { typedef BenchmarkReporter::Run Run; @@ -112,22 +122,22 @@ std::vector ComputeStats( it = counter_stats.find(cnt.first); it->second.s.reserve(reports.size()); } else { - CHECK_EQ(counter_stats[cnt.first].c.flags, cnt.second.flags); + BM_CHECK_EQ(counter_stats[cnt.first].c.flags, cnt.second.flags); } } } // Populate the accumulators. for (Run const& run : reports) { - CHECK_EQ(reports[0].benchmark_name(), run.benchmark_name()); - CHECK_EQ(run_iterations, run.iterations); + BM_CHECK_EQ(reports[0].benchmark_name(), run.benchmark_name()); + BM_CHECK_EQ(run_iterations, run.iterations); if (run.error_occurred) continue; real_accumulated_time_stat.emplace_back(run.real_accumulated_time); cpu_accumulated_time_stat.emplace_back(run.cpu_accumulated_time); // user counters for (auto const& cnt : run.counters) { auto it = counter_stats.find(cnt.first); - CHECK_NE(it, counter_stats.end()); + BM_CHECK_NE(it, counter_stats.end()); it->second.s.emplace_back(cnt.second); } } @@ -155,6 +165,7 @@ std::vector ComputeStats( data.repetitions = reports[0].repetitions; data.repetition_index = Run::no_repetition_index; data.aggregate_name = Stat.name_; + data.aggregate_unit = Stat.unit_; data.report_label = report_label; // It is incorrect to say that an aggregate is computed over @@ -167,13 +178,15 @@ std::vector ComputeStats( data.real_accumulated_time = Stat.compute_(real_accumulated_time_stat); data.cpu_accumulated_time = Stat.compute_(cpu_accumulated_time_stat); - // We will divide these times by data.iterations when reporting, but the - // data.iterations is not nessesairly the scale of these measurements, - // because in each repetition, these timers are sum over all the iterations. - // And if we want to say that the stats are over N repetitions and not - // M iterations, we need to multiply these by (N/M). - data.real_accumulated_time *= iteration_rescale_factor; - data.cpu_accumulated_time *= iteration_rescale_factor; + if (data.aggregate_unit == StatisticUnit::kTime) { + // We will divide these times by data.iterations when reporting, but the + // data.iterations is not necessarily the scale of these measurements, + // because in each repetition, these timers are sum over all the iters. + // And if we want to say that the stats are over N repetitions and not + // M iterations, we need to multiply these by (N/M). + data.real_accumulated_time *= iteration_rescale_factor; + data.cpu_accumulated_time *= iteration_rescale_factor; + } data.time_unit = reports[0].time_unit; diff --git a/third-party/benchmark/src/statistics.h b/third-party/benchmark/src/statistics.h index 7eccc85536a5..a9545a58c648 100644 --- a/third-party/benchmark/src/statistics.h +++ b/third-party/benchmark/src/statistics.h @@ -31,6 +31,7 @@ std::vector ComputeStats( double StatisticsMean(const std::vector& v); double StatisticsMedian(const std::vector& v); double StatisticsStdDev(const std::vector& v); +double StatisticsCV(const std::vector& v); } // end namespace benchmark diff --git a/third-party/benchmark/src/string_util.cc b/third-party/benchmark/src/string_util.cc index 3551418174fd..401fa13df7af 100644 --- a/third-party/benchmark/src/string_util.cc +++ b/third-party/benchmark/src/string_util.cc @@ -151,7 +151,7 @@ std::string StrFormatImp(const char* msg, va_list args) { auto buff_ptr = std::unique_ptr(new char[size]); // 2015-10-08: vsnprintf is used instead of snd::vsnprintf due to a limitation // in the android-ndk - ret = vsnprintf(buff_ptr.get(), size, msg, args); + vsnprintf(buff_ptr.get(), size, msg, args); return std::string(buff_ptr.get()); } @@ -198,11 +198,10 @@ unsigned long stoul(const std::string& str, size_t* pos, int base) { /* Check for errors and return */ if (strtoulErrno == ERANGE) { - throw std::out_of_range( - "stoul failed: " + str + " is outside of range of unsigned long"); + throw std::out_of_range("stoul failed: " + str + + " is outside of range of unsigned long"); } else if (strEnd == strStart || strtoulErrno != 0) { - throw std::invalid_argument( - "stoul failed: " + str + " is not an integer"); + throw std::invalid_argument("stoul failed: " + str + " is not an integer"); } if (pos != nullptr) { *pos = static_cast(strEnd - strStart); @@ -225,11 +224,10 @@ int stoi(const std::string& str, size_t* pos, int base) { /* Check for errors and return */ if (strtolErrno == ERANGE || long(int(result)) != result) { - throw std::out_of_range( - "stoul failed: " + str + " is outside of range of int"); + throw std::out_of_range("stoul failed: " + str + + " is outside of range of int"); } else if (strEnd == strStart || strtolErrno != 0) { - throw std::invalid_argument( - "stoul failed: " + str + " is not an integer"); + throw std::invalid_argument("stoul failed: " + str + " is not an integer"); } if (pos != nullptr) { *pos = static_cast(strEnd - strStart); @@ -252,11 +250,10 @@ double stod(const std::string& str, size_t* pos) { /* Check for errors and return */ if (strtodErrno == ERANGE) { - throw std::out_of_range( - "stoul failed: " + str + " is outside of range of int"); + throw std::out_of_range("stoul failed: " + str + + " is outside of range of int"); } else if (strEnd == strStart || strtodErrno != 0) { - throw std::invalid_argument( - "stoul failed: " + str + " is not an integer"); + throw std::invalid_argument("stoul failed: " + str + " is not an integer"); } if (pos != nullptr) { *pos = static_cast(strEnd - strStart); diff --git a/third-party/benchmark/src/string_util.h b/third-party/benchmark/src/string_util.h index 6bc28b6912a8..ff3b7da47d6c 100644 --- a/third-party/benchmark/src/string_util.h +++ b/third-party/benchmark/src/string_util.h @@ -4,6 +4,7 @@ #include #include #include + #include "internal_macros.h" namespace benchmark { @@ -39,6 +40,8 @@ inline std::string StrCat(Args&&... args) { std::vector StrSplit(const std::string& str, char delim); +// Disable lint checking for this block since it re-implements C functions. +// NOLINTBEGIN #ifdef BENCHMARK_STL_ANDROID_GNUSTL /* * GNU STL in Android NDK lacks support for some C++11 functions, including @@ -47,14 +50,15 @@ std::vector StrSplit(const std::string& str, char delim); * namespace, not std:: namespace. */ unsigned long stoul(const std::string& str, size_t* pos = nullptr, - int base = 10); + int base = 10); int stoi(const std::string& str, size_t* pos = nullptr, int base = 10); double stod(const std::string& str, size_t* pos = nullptr); #else -using std::stoul; -using std::stoi; -using std::stod; +using std::stod; // NOLINT(misc-unused-using-decls) +using std::stoi; // NOLINT(misc-unused-using-decls) +using std::stoul; // NOLINT(misc-unused-using-decls) #endif +// NOLINTEND } // end namespace benchmark diff --git a/third-party/benchmark/src/sysinfo.cc b/third-party/benchmark/src/sysinfo.cc index 112e9b4a0e2b..87dcfb466619 100644 --- a/third-party/benchmark/src/sysinfo.cc +++ b/third-party/benchmark/src/sysinfo.cc @@ -19,6 +19,7 @@ #undef StrCat // Don't let StrCat in string_util.h be renamed to lstrcatA #include #include + #include #else #include @@ -55,9 +56,9 @@ #include #include #include +#include #include #include -#include #include #include "check.h" @@ -135,7 +136,7 @@ struct ValueUnion { template std::array GetAsArray() { const int ArrSize = sizeof(T) * N; - CHECK_LE(ArrSize, Size); + BM_CHECK_LE(ArrSize, Size); std::array Arr; std::memcpy(Arr.data(), data(), ArrSize); return Arr; @@ -147,7 +148,7 @@ ValueUnion GetSysctlImp(std::string const& Name) { int mib[2]; mib[0] = CTL_HW; - if ((Name == "hw.ncpu") || (Name == "hw.cpuspeed")){ + if ((Name == "hw.ncpu") || (Name == "hw.cpuspeed")) { ValueUnion buff(sizeof(int)); if (Name == "hw.ncpu") { @@ -214,10 +215,9 @@ bool ReadFromFile(std::string const& fname, ArgT* arg) { CPUInfo::Scaling CpuScaling(int num_cpus) { // We don't have a valid CPU count, so don't even bother. if (num_cpus <= 0) return CPUInfo::Scaling::UNKNOWN; -#ifdef BENCHMARK_OS_QNX +#if defined(BENCHMARK_OS_QNX) return CPUInfo::Scaling::UNKNOWN; -#endif -#ifndef BENCHMARK_OS_WINDOWS +#elif !defined(BENCHMARK_OS_WINDOWS) // On Linux, the CPUfreq subsystem exposes CPU information as files on the // local file system. If reading the exported files fails, then we may not be // running on Linux, so we silently ignore all the read errors. @@ -225,11 +225,13 @@ CPUInfo::Scaling CpuScaling(int num_cpus) { for (int cpu = 0; cpu < num_cpus; ++cpu) { std::string governor_file = StrCat("/sys/devices/system/cpu/cpu", cpu, "/cpufreq/scaling_governor"); - if (ReadFromFile(governor_file, &res) && res != "performance") return CPUInfo::Scaling::ENABLED; + if (ReadFromFile(governor_file, &res) && res != "performance") + return CPUInfo::Scaling::ENABLED; } return CPUInfo::Scaling::DISABLED; -#endif +#else return CPUInfo::Scaling::UNKNOWN; +#endif } int CountSetBitsInCPUMap(std::string Val) { @@ -366,29 +368,29 @@ std::vector GetCacheSizesWindows() { #elif BENCHMARK_OS_QNX std::vector GetCacheSizesQNX() { std::vector res; - struct cacheattr_entry *cache = SYSPAGE_ENTRY(cacheattr); + struct cacheattr_entry* cache = SYSPAGE_ENTRY(cacheattr); uint32_t const elsize = SYSPAGE_ELEMENT_SIZE(cacheattr); - int num = SYSPAGE_ENTRY_SIZE(cacheattr) / elsize ; - for(int i = 0; i < num; ++i ) { + int num = SYSPAGE_ENTRY_SIZE(cacheattr) / elsize; + for (int i = 0; i < num; ++i) { CPUInfo::CacheInfo info; - switch (cache->flags){ - case CACHE_FLAG_INSTR : + switch (cache->flags) { + case CACHE_FLAG_INSTR: info.type = "Instruction"; info.level = 1; break; - case CACHE_FLAG_DATA : + case CACHE_FLAG_DATA: info.type = "Data"; info.level = 1; break; - case CACHE_FLAG_UNIFIED : + case CACHE_FLAG_UNIFIED: info.type = "Unified"; info.level = 2; break; - case CACHE_FLAG_SHARED : + case CACHE_FLAG_SHARED: info.type = "Shared"; info.level = 3; break; - default : + default: continue; break; } @@ -416,24 +418,23 @@ std::vector GetCacheSizes() { std::string GetSystemName() { #if defined(BENCHMARK_OS_WINDOWS) std::string str; - const unsigned COUNT = MAX_COMPUTERNAME_LENGTH+1; - TCHAR hostname[COUNT] = {'\0'}; + const unsigned COUNT = MAX_COMPUTERNAME_LENGTH + 1; + TCHAR hostname[COUNT] = {'\0'}; DWORD DWCOUNT = COUNT; - if (!GetComputerName(hostname, &DWCOUNT)) - return std::string(""); + if (!GetComputerName(hostname, &DWCOUNT)) return std::string(""); #ifndef UNICODE str = std::string(hostname, DWCOUNT); #else - //Using wstring_convert, Is deprecated in C++17 + // Using wstring_convert, Is deprecated in C++17 using convert_type = std::codecvt_utf8; std::wstring_convert converter; std::wstring wStr(hostname, DWCOUNT); str = converter.to_bytes(wStr); #endif return str; -#else // defined(BENCHMARK_OS_WINDOWS) +#else // defined(BENCHMARK_OS_WINDOWS) #ifndef HOST_NAME_MAX -#ifdef BENCHMARK_HAS_SYSCTL // BSD/Mac Doesnt have HOST_NAME_MAX defined +#ifdef BENCHMARK_HAS_SYSCTL // BSD/Mac Doesnt have HOST_NAME_MAX defined #define HOST_NAME_MAX 64 #elif defined(BENCHMARK_OS_NACL) #define HOST_NAME_MAX 64 @@ -442,15 +443,15 @@ std::string GetSystemName() { #elif defined(BENCHMARK_OS_RTEMS) #define HOST_NAME_MAX 256 #else -#warning "HOST_NAME_MAX not defined. using 64" +#pragma message("HOST_NAME_MAX not defined. using 64") #define HOST_NAME_MAX 64 #endif -#endif // def HOST_NAME_MAX +#endif // def HOST_NAME_MAX char hostname[HOST_NAME_MAX]; int retVal = gethostname(hostname, HOST_NAME_MAX); if (retVal != 0) return std::string(""); return std::string(hostname); -#endif // Catch-all POSIX block. +#endif // Catch-all POSIX block. } int GetNumCPUs() { @@ -472,8 +473,7 @@ int GetNumCPUs() { // Returns -1 in case of a failure. int NumCPU = sysconf(_SC_NPROCESSORS_ONLN); if (NumCPU < 0) { - fprintf(stderr, - "sysconf(_SC_NPROCESSORS_ONLN) failed with error: %s\n", + fprintf(stderr, "sysconf(_SC_NPROCESSORS_ONLN) failed with error: %s\n", strerror(errno)); } return NumCPU; @@ -496,7 +496,8 @@ int GetNumCPUs() { #if defined(__s390__) // s390 has another format in /proc/cpuinfo // it needs to be parsed differently - if (SplitIdx != std::string::npos) value = ln.substr(Key.size()+1,SplitIdx-Key.size()-1); + if (SplitIdx != std::string::npos) + value = ln.substr(Key.size() + 1, SplitIdx - Key.size() - 1); #else if (SplitIdx != std::string::npos) value = ln.substr(SplitIdx + 1); #endif @@ -543,7 +544,7 @@ double GetCPUCyclesPerSecond(CPUInfo::Scaling scaling) { // cannot always be relied upon. The same reasons apply to /proc/cpuinfo as // well. if (ReadFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", &freq) - // If CPU scaling is disabled, use the the *current* frequency. + // If CPU scaling is disabled, use the *current* frequency. // Note that we specifically don't want to read cpuinfo_cur_freq, // because it is only readable by root. || (scaling == CPUInfo::Scaling::DISABLED && @@ -642,13 +643,13 @@ double GetCPUCyclesPerSecond(CPUInfo::Scaling scaling) { "~MHz", nullptr, &data, &data_size))) return static_cast((int64_t)data * (int64_t)(1000 * 1000)); // was mhz -#elif defined (BENCHMARK_OS_SOLARIS) - kstat_ctl_t *kc = kstat_open(); +#elif defined(BENCHMARK_OS_SOLARIS) + kstat_ctl_t* kc = kstat_open(); if (!kc) { std::cerr << "failed to open /dev/kstat\n"; return -1; } - kstat_t *ksp = kstat_lookup(kc, (char*)"cpu_info", -1, (char*)"cpu_info0"); + kstat_t* ksp = kstat_lookup(kc, (char*)"cpu_info", -1, (char*)"cpu_info0"); if (!ksp) { std::cerr << "failed to lookup in /dev/kstat\n"; return -1; @@ -657,7 +658,7 @@ double GetCPUCyclesPerSecond(CPUInfo::Scaling scaling) { std::cerr << "failed to read from /dev/kstat\n"; return -1; } - kstat_named_t *knp = + kstat_named_t* knp = (kstat_named_t*)kstat_data_lookup(ksp, (char*)"current_clock_Hz"); if (!knp) { std::cerr << "failed to lookup data in /dev/kstat\n"; @@ -671,7 +672,7 @@ double GetCPUCyclesPerSecond(CPUInfo::Scaling scaling) { double clock_hz = knp->value.ui64; kstat_close(kc); return clock_hz; -#elif defined (BENCHMARK_OS_QNX) +#elif defined(BENCHMARK_OS_QNX) return static_cast((int64_t)(SYSPAGE_ENTRY(cpuinfo)->speed) * (int64_t)(1000 * 1000)); #endif diff --git a/third-party/benchmark/src/thread_manager.h b/third-party/benchmark/src/thread_manager.h index 28e2dd53aff2..468028508940 100644 --- a/third-party/benchmark/src/thread_manager.h +++ b/third-party/benchmark/src/thread_manager.h @@ -36,7 +36,6 @@ class ThreadManager { [this]() { return alive_threads_ == 0; }); } - public: struct Result { IterationCount iterations = 0; double real_time_used = 0; diff --git a/third-party/benchmark/src/thread_timer.h b/third-party/benchmark/src/thread_timer.h index 1703ca0d6f87..eb23f5956150 100644 --- a/third-party/benchmark/src/thread_timer.h +++ b/third-party/benchmark/src/thread_timer.h @@ -28,7 +28,7 @@ class ThreadTimer { // Called by each thread void StopTimer() { - CHECK(running_); + BM_CHECK(running_); running_ = false; real_time_used_ += ChronoClockNow() - start_real_time_; // Floating point error can result in the subtraction producing a negative @@ -44,19 +44,19 @@ class ThreadTimer { // REQUIRES: timer is not running double real_time_used() const { - CHECK(!running_); + BM_CHECK(!running_); return real_time_used_; } // REQUIRES: timer is not running double cpu_time_used() const { - CHECK(!running_); + BM_CHECK(!running_); return cpu_time_used_; } // REQUIRES: timer is not running double manual_time_used() const { - CHECK(!running_); + BM_CHECK(!running_); return manual_time_used_; } diff --git a/third-party/benchmark/src/timers.cc b/third-party/benchmark/src/timers.cc index af4767dff944..21d3db20da9e 100644 --- a/third-party/benchmark/src/timers.cc +++ b/third-party/benchmark/src/timers.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "timers.h" + #include "internal_macros.h" #ifdef BENCHMARK_OS_WINDOWS @@ -125,8 +126,8 @@ double ProcessCPUUsage() { // syncronous system calls in Emscripten. return emscripten_get_now() * 1e-3; #elif defined(CLOCK_PROCESS_CPUTIME_ID) && !defined(BENCHMARK_OS_MACOSX) - // FIXME We want to use clock_gettime, but its not available in MacOS 10.11. See - // https://github.com/google/benchmark/pull/292 + // FIXME We want to use clock_gettime, but its not available in MacOS 10.11. + // See https://github.com/google/benchmark/pull/292 struct timespec spec; if (clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &spec) == 0) return MakeTime(spec); @@ -149,13 +150,14 @@ double ThreadCPUUsage() { &user_time); return MakeTime(kernel_time, user_time); #elif defined(BENCHMARK_OS_MACOSX) - // FIXME We want to use clock_gettime, but its not available in MacOS 10.11. See - // https://github.com/google/benchmark/pull/292 + // FIXME We want to use clock_gettime, but its not available in MacOS 10.11. + // See https://github.com/google/benchmark/pull/292 mach_msg_type_number_t count = THREAD_BASIC_INFO_COUNT; thread_basic_info_data_t info; mach_port_t thread = pthread_mach_thread_np(pthread_self()); - if (thread_info(thread, THREAD_BASIC_INFO, (thread_info_t)&info, &count) == - KERN_SUCCESS) { + if (thread_info(thread, THREAD_BASIC_INFO, + reinterpret_cast(&info), + &count) == KERN_SUCCESS) { return MakeTime(info); } DiagnoseAndExit("ThreadCPUUsage() failed when evaluating thread_info"); @@ -191,11 +193,14 @@ std::string LocalDateTimeString() { long int offset_minutes; char tz_offset_sign = '+'; // tz_offset is set in one of three ways: - // * strftime with %z - This either returns empty or the ISO 8601 time. The maximum length an + // * strftime with %z - This either returns empty or the ISO 8601 time. The + // maximum length an // ISO 8601 string can be is 7 (e.g. -03:30, plus trailing zero). - // * snprintf with %c%02li:%02li - The maximum length is 41 (one for %c, up to 19 for %02li, + // * snprintf with %c%02li:%02li - The maximum length is 41 (one for %c, up to + // 19 for %02li, // one for :, up to 19 %02li, plus trailing zero). - // * A fixed string of "-00:00". The maximum length is 7 (-00:00, plus trailing zero). + // * A fixed string of "-00:00". The maximum length is 7 (-00:00, plus + // trailing zero). // // Thus, the maximum size this needs to be is 41. char tz_offset[41]; @@ -203,10 +208,10 @@ std::string LocalDateTimeString() { char storage[128]; #if defined(BENCHMARK_OS_WINDOWS) - std::tm *timeinfo_p = ::localtime(&now); + std::tm* timeinfo_p = ::localtime(&now); #else std::tm timeinfo; - std::tm *timeinfo_p = &timeinfo; + std::tm* timeinfo_p = &timeinfo; ::localtime_r(&now, &timeinfo); #endif @@ -223,10 +228,11 @@ std::string LocalDateTimeString() { tz_offset_sign = '-'; } - tz_len = ::snprintf(tz_offset, sizeof(tz_offset), "%c%02li:%02li", - tz_offset_sign, offset_minutes / 100, offset_minutes % 100); - CHECK(tz_len == kTzOffsetLen); - ((void)tz_len); // Prevent unused variable warning in optimized build. + tz_len = + ::snprintf(tz_offset, sizeof(tz_offset), "%c%02li:%02li", + tz_offset_sign, offset_minutes / 100, offset_minutes % 100); + BM_CHECK(tz_len == kTzOffsetLen); + ((void)tz_len); // Prevent unused variable warning in optimized build. } else { // Unknown offset. RFC3339 specifies that unknown local offsets should be // written as UTC time with -00:00 timezone. @@ -240,9 +246,9 @@ std::string LocalDateTimeString() { strncpy(tz_offset, "-00:00", kTzOffsetLen + 1); } - timestamp_len = std::strftime(storage, sizeof(storage), "%Y-%m-%dT%H:%M:%S", - timeinfo_p); - CHECK(timestamp_len == kTimestampLen); + timestamp_len = + std::strftime(storage, sizeof(storage), "%Y-%m-%dT%H:%M:%S", timeinfo_p); + BM_CHECK(timestamp_len == kTimestampLen); // Prevent unused variable warning in optimized build. ((void)kTimestampLen); diff --git a/third-party/benchmark/test/BUILD b/third-party/benchmark/test/BUILD index 1f27f99ede9f..df700a7a8100 100644 --- a/third-party/benchmark/test/BUILD +++ b/third-party/benchmark/test/BUILD @@ -21,6 +21,7 @@ TEST_ARGS = ["--benchmark_min_time=0.01"] PER_SRC_TEST_ARGS = ({ "user_counters_tabular_test.cc": ["--benchmark_counters_tabular=true"], "repetitions_test.cc": [" --benchmark_repetitions=3"], + "spec_arg_test.cc" : ["--benchmark_filter=BM_NotChosen"], }) load("@rules_cc//cc:defs.bzl", "cc_library", "cc_test") diff --git a/third-party/benchmark/test/CMakeLists.txt b/third-party/benchmark/test/CMakeLists.txt index 79cdf53b402c..162af53f80f7 100644 --- a/third-party/benchmark/test/CMakeLists.txt +++ b/third-party/benchmark/test/CMakeLists.txt @@ -56,6 +56,12 @@ endmacro(compile_output_test) compile_benchmark_test(benchmark_test) add_test(NAME benchmark COMMAND benchmark_test --benchmark_min_time=0.01) +compile_benchmark_test(spec_arg_test) +add_test(NAME spec_arg COMMAND spec_arg_test --benchmark_filter=BM_NotChosen) + +compile_benchmark_test(benchmark_setup_teardown_test) +add_test(NAME benchmark_setup_teardown COMMAND benchmark_setup_teardown_test) + compile_benchmark_test(filter_test) macro(add_filter_test name filter expect) add_test(NAME ${name} COMMAND filter_test --benchmark_min_time=0.01 --benchmark_filter=${filter} ${expect}) diff --git a/third-party/benchmark/test/args_product_test.cc b/third-party/benchmark/test/args_product_test.cc index 32a75d50dd9e..d44f391f7480 100644 --- a/third-party/benchmark/test/args_product_test.cc +++ b/third-party/benchmark/test/args_product_test.cc @@ -1,10 +1,10 @@ -#include "benchmark/benchmark.h" - #include #include #include #include +#include "benchmark/benchmark.h" + class ArgsProductFixture : public ::benchmark::Fixture { public: ArgsProductFixture() @@ -37,7 +37,7 @@ class ArgsProductFixture : public ::benchmark::Fixture { virtual ~ArgsProductFixture() { if (actualValues != expectedValues) { std::cout << "EXPECTED\n"; - for (auto v : expectedValues) { + for (const auto& v : expectedValues) { std::cout << "{"; for (int64_t iv : v) { std::cout << iv << ", "; @@ -45,7 +45,7 @@ class ArgsProductFixture : public ::benchmark::Fixture { std::cout << "}\n"; } std::cout << "ACTUAL\n"; - for (auto v : actualValues) { + for (const auto& v : actualValues) { std::cout << "{"; for (int64_t iv : v) { std::cout << iv << ", "; diff --git a/third-party/benchmark/test/basic_test.cc b/third-party/benchmark/test/basic_test.cc index 33642211e205..3a8fd42a8cdd 100644 --- a/third-party/benchmark/test/basic_test.cc +++ b/third-party/benchmark/test/basic_test.cc @@ -13,7 +13,7 @@ BENCHMARK(BM_empty)->ThreadPerCpu(); void BM_spin_empty(benchmark::State& state) { for (auto _ : state) { - for (int x = 0; x < state.range(0); ++x) { + for (auto x = 0; x < state.range(0); ++x) { benchmark::DoNotOptimize(x); } } @@ -22,11 +22,11 @@ BASIC_BENCHMARK_TEST(BM_spin_empty); BASIC_BENCHMARK_TEST(BM_spin_empty)->ThreadPerCpu(); void BM_spin_pause_before(benchmark::State& state) { - for (int i = 0; i < state.range(0); ++i) { + for (auto i = 0; i < state.range(0); ++i) { benchmark::DoNotOptimize(i); } for (auto _ : state) { - for (int i = 0; i < state.range(0); ++i) { + for (auto i = 0; i < state.range(0); ++i) { benchmark::DoNotOptimize(i); } } @@ -37,11 +37,11 @@ BASIC_BENCHMARK_TEST(BM_spin_pause_before)->ThreadPerCpu(); void BM_spin_pause_during(benchmark::State& state) { for (auto _ : state) { state.PauseTiming(); - for (int i = 0; i < state.range(0); ++i) { + for (auto i = 0; i < state.range(0); ++i) { benchmark::DoNotOptimize(i); } state.ResumeTiming(); - for (int i = 0; i < state.range(0); ++i) { + for (auto i = 0; i < state.range(0); ++i) { benchmark::DoNotOptimize(i); } } @@ -62,11 +62,11 @@ BENCHMARK(BM_pause_during)->UseRealTime()->ThreadPerCpu(); void BM_spin_pause_after(benchmark::State& state) { for (auto _ : state) { - for (int i = 0; i < state.range(0); ++i) { + for (auto i = 0; i < state.range(0); ++i) { benchmark::DoNotOptimize(i); } } - for (int i = 0; i < state.range(0); ++i) { + for (auto i = 0; i < state.range(0); ++i) { benchmark::DoNotOptimize(i); } } @@ -74,15 +74,15 @@ BASIC_BENCHMARK_TEST(BM_spin_pause_after); BASIC_BENCHMARK_TEST(BM_spin_pause_after)->ThreadPerCpu(); void BM_spin_pause_before_and_after(benchmark::State& state) { - for (int i = 0; i < state.range(0); ++i) { + for (auto i = 0; i < state.range(0); ++i) { benchmark::DoNotOptimize(i); } for (auto _ : state) { - for (int i = 0; i < state.range(0); ++i) { + for (auto i = 0; i < state.range(0); ++i) { benchmark::DoNotOptimize(i); } } - for (int i = 0; i < state.range(0); ++i) { + for (auto i = 0; i < state.range(0); ++i) { benchmark::DoNotOptimize(i); } } @@ -96,7 +96,6 @@ void BM_empty_stop_start(benchmark::State& state) { BENCHMARK(BM_empty_stop_start); BENCHMARK(BM_empty_stop_start)->ThreadPerCpu(); - void BM_KeepRunning(benchmark::State& state) { benchmark::IterationCount iter_count = 0; assert(iter_count == state.iterations()); @@ -142,10 +141,39 @@ void BM_RangedFor(benchmark::State& state) { } BENCHMARK(BM_RangedFor); +#ifdef BENCHMARK_HAS_CXX11 +template +void BM_OneTemplateFunc(benchmark::State& state) { + auto arg = state.range(0); + T sum = 0; + for (auto _ : state) { + sum += arg; + } +} +BENCHMARK(BM_OneTemplateFunc)->Arg(1); +BENCHMARK(BM_OneTemplateFunc)->Arg(1); + +template +void BM_TwoTemplateFunc(benchmark::State& state) { + auto arg = state.range(0); + A sum = 0; + B prod = 1; + for (auto _ : state) { + sum += arg; + prod *= arg; + } +} +BENCHMARK(BM_TwoTemplateFunc)->Arg(1); +BENCHMARK(BM_TwoTemplateFunc)->Arg(1); + +#endif // BENCHMARK_HAS_CXX11 + // Ensure that StateIterator provides all the necessary typedefs required to // instantiate std::iterator_traits. -static_assert(std::is_same< - typename std::iterator_traits::value_type, - typename benchmark::State::StateIterator::value_type>::value, ""); +static_assert( + std::is_same::value_type, + typename benchmark::State::StateIterator::value_type>::value, + ""); BENCHMARK_MAIN(); diff --git a/third-party/benchmark/test/benchmark_random_interleaving_gtest.cc b/third-party/benchmark/test/benchmark_random_interleaving_gtest.cc index 8e28dab3f41d..d04befa8e381 100644 --- a/third-party/benchmark/test/benchmark_random_interleaving_gtest.cc +++ b/third-party/benchmark/test/benchmark_random_interleaving_gtest.cc @@ -8,11 +8,12 @@ #include "gmock/gmock.h" #include "gtest/gtest.h" -DECLARE_bool(benchmark_enable_random_interleaving); -DECLARE_string(benchmark_filter); -DECLARE_int32(benchmark_repetitions); - namespace benchmark { + +BM_DECLARE_bool(benchmark_enable_random_interleaving); +BM_DECLARE_string(benchmark_filter); +BM_DECLARE_int32(benchmark_repetitions); + namespace internal { namespace { @@ -33,7 +34,7 @@ class EventQueue : public std::queue { } }; -static EventQueue* queue = new EventQueue; +EventQueue* queue = new EventQueue(); class NullReporter : public BenchmarkReporter { public: @@ -59,7 +60,7 @@ class BenchmarkTest : public testing::Test { } }; -static void BM_Match1(benchmark::State& state) { +void BM_Match1(benchmark::State& state) { const int64_t arg = state.range(0); for (auto _ : state) { @@ -110,8 +111,8 @@ TEST_F(BenchmarkTest, Match1WithRandomInterleaving) { std::vector interleaving; interleaving.push_back(queue->Get()); interleaving.push_back(queue->Get()); - element_count[interleaving[0].c_str()]++; - element_count[interleaving[1].c_str()]++; + element_count[interleaving[0]]++; + element_count[interleaving[1]]++; interleaving_count[StrFormat("%s,%s", interleaving[0].c_str(), interleaving[1].c_str())]++; } diff --git a/third-party/benchmark/test/benchmark_setup_teardown_test.cc b/third-party/benchmark/test/benchmark_setup_teardown_test.cc new file mode 100644 index 000000000000..efa34e15c129 --- /dev/null +++ b/third-party/benchmark/test/benchmark_setup_teardown_test.cc @@ -0,0 +1,157 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "benchmark/benchmark.h" + +// Test that Setup() and Teardown() are called exactly once +// for each benchmark run (single-threaded). +namespace single { +static int setup_call = 0; +static int teardown_call = 0; +} // namespace single +static void DoSetup1(const benchmark::State& state) { + ++single::setup_call; + + // Setup/Teardown should never be called with any thread_idx != 0. + assert(state.thread_index() == 0); +} + +static void DoTeardown1(const benchmark::State& state) { + ++single::teardown_call; + assert(state.thread_index() == 0); +} + +static void BM_with_setup(benchmark::State& state) { + for (auto s : state) { + } +} +BENCHMARK(BM_with_setup) + ->Arg(1) + ->Arg(3) + ->Arg(5) + ->Arg(7) + ->Iterations(100) + ->Setup(DoSetup1) + ->Teardown(DoTeardown1); + +// Test that Setup() and Teardown() are called once for each group of threads. +namespace concurrent { +static std::atomic setup_call(0); +static std::atomic teardown_call(0); +static std::atomic func_call(0); +} // namespace concurrent + +static void DoSetup2(const benchmark::State& state) { + concurrent::setup_call.fetch_add(1, std::memory_order_acquire); + assert(state.thread_index() == 0); +} + +static void DoTeardown2(const benchmark::State& state) { + concurrent::teardown_call.fetch_add(1, std::memory_order_acquire); + assert(state.thread_index() == 0); +} + +static void BM_concurrent(benchmark::State& state) { + for (auto s : state) { + } + concurrent::func_call.fetch_add(1, std::memory_order_acquire); +} + +BENCHMARK(BM_concurrent) + ->Setup(DoSetup2) + ->Teardown(DoTeardown2) + ->Iterations(100) + ->Threads(5) + ->Threads(10) + ->Threads(15); + +// Testing interaction with Fixture::Setup/Teardown +namespace fixture_interaction { +int setup = 0; +int fixture_setup = 0; +} // namespace fixture_interaction + +#define FIXTURE_BECHMARK_NAME MyFixture + +class FIXTURE_BECHMARK_NAME : public ::benchmark::Fixture { + public: + void SetUp(const ::benchmark::State&) BENCHMARK_OVERRIDE { + fixture_interaction::fixture_setup++; + } + + ~FIXTURE_BECHMARK_NAME() {} +}; + +BENCHMARK_F(FIXTURE_BECHMARK_NAME, BM_WithFixture)(benchmark::State& st) { + for (auto _ : st) { + } +} + +static void DoSetupWithFixture(const benchmark::State&) { + fixture_interaction::setup++; +} + +BENCHMARK_REGISTER_F(FIXTURE_BECHMARK_NAME, BM_WithFixture) + ->Arg(1) + ->Arg(3) + ->Arg(5) + ->Arg(7) + ->Setup(DoSetupWithFixture) + ->Repetitions(1) + ->Iterations(100); + +// Testing repetitions. +namespace repetitions { +int setup = 0; +} + +static void DoSetupWithRepetitions(const benchmark::State&) { + repetitions::setup++; +} +static void BM_WithRep(benchmark::State& state) { + for (auto _ : state) { + } +} + +BENCHMARK(BM_WithRep) + ->Arg(1) + ->Arg(3) + ->Arg(5) + ->Arg(7) + ->Setup(DoSetupWithRepetitions) + ->Iterations(100) + ->Repetitions(4); + +int main(int argc, char** argv) { + benchmark::Initialize(&argc, argv); + + size_t ret = benchmark::RunSpecifiedBenchmarks("."); + assert(ret > 0); + + // Setup/Teardown is called once for each arg group (1,3,5,7). + assert(single::setup_call == 4); + assert(single::teardown_call == 4); + + // 3 group of threads calling this function (3,5,10). + assert(concurrent::setup_call.load(std::memory_order_relaxed) == 3); + assert(concurrent::teardown_call.load(std::memory_order_relaxed) == 3); + assert((5 + 10 + 15) == + concurrent::func_call.load(std::memory_order_relaxed)); + + // Setup is called 4 times, once for each arg group (1,3,5,7) + assert(fixture_interaction::setup == 4); + // Fixture::Setup is called everytime the bm routine is run. + // The exact number is indeterministic, so we just assert that + // it's more than setup. + assert(fixture_interaction::fixture_setup > fixture_interaction::setup); + + // Setup is call once for each repetition * num_arg = 4 * 4 = 16. + assert(repetitions::setup == 16); + + return 0; +} diff --git a/third-party/benchmark/test/benchmark_test.cc b/third-party/benchmark/test/benchmark_test.cc index 3cd4f5565fa1..2906cdcde997 100644 --- a/third-party/benchmark/test/benchmark_test.cc +++ b/third-party/benchmark/test/benchmark_test.cc @@ -93,8 +93,9 @@ static void BM_SetInsert(benchmark::State& state) { state.SetBytesProcessed(state.iterations() * state.range(1) * sizeof(int)); } -// Test many inserts at once to reduce the total iterations needed. Otherwise, the slower, -// non-timed part of each iteration will make the benchmark take forever. +// Test many inserts at once to reduce the total iterations needed. Otherwise, +// the slower, non-timed part of each iteration will make the benchmark take +// forever. BENCHMARK(BM_SetInsert)->Ranges({{1 << 10, 8 << 10}, {128, 512}}); template Range(1, 1 << 20); static void BM_SetupTeardown(benchmark::State& state) { - if (state.thread_index == 0) { + if (state.thread_index() == 0) { // No need to lock test_vector_mu here as this is running single-threaded. test_vector = new std::vector(); } @@ -139,7 +140,7 @@ static void BM_SetupTeardown(benchmark::State& state) { test_vector->pop_back(); ++i; } - if (state.thread_index == 0) { + if (state.thread_index() == 0) { delete test_vector; } } @@ -156,11 +157,11 @@ BENCHMARK(BM_LongTest)->Range(1 << 16, 1 << 28); static void BM_ParallelMemset(benchmark::State& state) { int64_t size = state.range(0) / static_cast(sizeof(int)); - int thread_size = static_cast(size) / state.threads; - int from = thread_size * state.thread_index; + int thread_size = static_cast(size) / state.threads(); + int from = thread_size * state.thread_index(); int to = from + thread_size; - if (state.thread_index == 0) { + if (state.thread_index() == 0) { test_vector = new std::vector(static_cast(size)); } @@ -172,7 +173,7 @@ static void BM_ParallelMemset(benchmark::State& state) { } } - if (state.thread_index == 0) { + if (state.thread_index() == 0) { delete test_vector; } } @@ -214,7 +215,8 @@ BENCHMARK_CAPTURE(BM_with_args, string_and_pair_test, std::string("abc"), std::pair(42, 3.8)); void BM_non_template_args(benchmark::State& state, int, double) { - while(state.KeepRunning()) {} + while (state.KeepRunning()) { + } } BENCHMARK_CAPTURE(BM_non_template_args, basic_test, 0, 0); @@ -223,14 +225,14 @@ BENCHMARK_CAPTURE(BM_non_template_args, basic_test, 0, 0); static void BM_DenseThreadRanges(benchmark::State& st) { switch (st.range(0)) { case 1: - assert(st.threads == 1 || st.threads == 2 || st.threads == 3); + assert(st.threads() == 1 || st.threads() == 2 || st.threads() == 3); break; case 2: - assert(st.threads == 1 || st.threads == 3 || st.threads == 4); + assert(st.threads() == 1 || st.threads() == 3 || st.threads() == 4); break; case 3: - assert(st.threads == 5 || st.threads == 8 || st.threads == 11 || - st.threads == 14); + assert(st.threads() == 5 || st.threads() == 8 || st.threads() == 11 || + st.threads() == 14); break; default: assert(false && "Invalid test case number"); diff --git a/third-party/benchmark/test/clobber_memory_assembly_test.cc b/third-party/benchmark/test/clobber_memory_assembly_test.cc index f41911a39ce7..ab269130cd5c 100644 --- a/third-party/benchmark/test/clobber_memory_assembly_test.cc +++ b/third-party/benchmark/test/clobber_memory_assembly_test.cc @@ -9,7 +9,6 @@ extern "C" { extern int ExternInt; extern int ExternInt2; extern int ExternInt3; - } // CHECK-LABEL: test_basic: diff --git a/third-party/benchmark/test/complexity_test.cc b/third-party/benchmark/test/complexity_test.cc index 0de73c5722b5..1251cd44f52d 100644 --- a/third-party/benchmark/test/complexity_test.cc +++ b/third-party/benchmark/test/complexity_test.cc @@ -4,6 +4,7 @@ #include #include #include + #include "benchmark/benchmark.h" #include "output_test.h" @@ -12,9 +13,10 @@ namespace { #define ADD_COMPLEXITY_CASES(...) \ int CONCAT(dummy, __LINE__) = AddComplexityTest(__VA_ARGS__) -int AddComplexityTest(std::string test_name, std::string big_o_test_name, - std::string rms_test_name, std::string big_o, - int family_index) { +int AddComplexityTest(const std::string &test_name, + const std::string &big_o_test_name, + const std::string &rms_test_name, + const std::string &big_o, int family_index) { SetSubstitutions({{"%name", test_name}, {"%bigo_name", big_o_test_name}, {"%rms_name", rms_test_name}, @@ -36,6 +38,7 @@ int AddComplexityTest(std::string test_name, std::string big_o_test_name, {"\"repetitions\": %int,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"BigO\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"cpu_coefficient\": %float,$", MR_Next}, {"\"real_coefficient\": %float,$", MR_Next}, {"\"big_o\": \"%bigo\",$", MR_Next}, @@ -49,6 +52,7 @@ int AddComplexityTest(std::string test_name, std::string big_o_test_name, {"\"repetitions\": %int,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"RMS\",$", MR_Next}, + {"\"aggregate_unit\": \"percentage\",$", MR_Next}, {"\"rms\": %float$", MR_Next}, {"}", MR_Next}}); AddCases(TC_CSVOut, {{"^\"%bigo_name\",,%float,%float,%bigo,,,,,$"}, @@ -63,7 +67,7 @@ int AddComplexityTest(std::string test_name, std::string big_o_test_name, // --------------------------- Testing BigO O(1) --------------------------- // // ========================================================================= // -void BM_Complexity_O1(benchmark::State& state) { +void BM_Complexity_O1(benchmark::State &state) { for (auto _ : state) { for (int i = 0; i < 1024; ++i) { benchmark::DoNotOptimize(&i); @@ -112,7 +116,7 @@ std::vector ConstructRandomVector(int64_t size) { return v; } -void BM_Complexity_O_N(benchmark::State& state) { +void BM_Complexity_O_N(benchmark::State &state) { auto v = ConstructRandomVector(state.range(0)); // Test worst case scenario (item not in vector) const int64_t item_not_in_vector = state.range(0) * 2; @@ -154,7 +158,7 @@ ADD_COMPLEXITY_CASES(n_test_name, big_o_n_test_name, rms_o_n_test_name, // ------------------------- Testing BigO O(N*lgN) ------------------------- // // ========================================================================= // -static void BM_Complexity_O_N_log_N(benchmark::State& state) { +static void BM_Complexity_O_N_log_N(benchmark::State &state) { auto v = ConstructRandomVector(state.range(0)); for (auto _ : state) { std::sort(v.begin(), v.end()); @@ -197,7 +201,7 @@ ADD_COMPLEXITY_CASES(n_lg_n_test_name, big_o_n_lg_n_test_name, // -------- Testing formatting of Complexity with captured args ------------ // // ========================================================================= // -void BM_ComplexityCaptureArgs(benchmark::State& state, int n) { +void BM_ComplexityCaptureArgs(benchmark::State &state, int n) { for (auto _ : state) { // This test requires a non-zero CPU time to avoid divide-by-zero benchmark::DoNotOptimize(state.iterations()); diff --git a/third-party/benchmark/test/cxx03_test.cc b/third-party/benchmark/test/cxx03_test.cc index c4c9a52273e3..9711c1bd4a9b 100644 --- a/third-party/benchmark/test/cxx03_test.cc +++ b/third-party/benchmark/test/cxx03_test.cc @@ -44,8 +44,7 @@ BENCHMARK_TEMPLATE(BM_template1, long); BENCHMARK_TEMPLATE1(BM_template1, int); template -struct BM_Fixture : public ::benchmark::Fixture { -}; +struct BM_Fixture : public ::benchmark::Fixture {}; BENCHMARK_TEMPLATE_F(BM_Fixture, BM_template1, long)(benchmark::State& state) { BM_empty(state); @@ -55,8 +54,8 @@ BENCHMARK_TEMPLATE1_F(BM_Fixture, BM_template2, int)(benchmark::State& state) { } void BM_counters(benchmark::State& state) { - BM_empty(state); - state.counters["Foo"] = 2; + BM_empty(state); + state.counters["Foo"] = 2; } BENCHMARK(BM_counters); diff --git a/third-party/benchmark/test/diagnostics_test.cc b/third-party/benchmark/test/diagnostics_test.cc index dd64a3365531..c54d5b0d708a 100644 --- a/third-party/benchmark/test/diagnostics_test.cc +++ b/third-party/benchmark/test/diagnostics_test.cc @@ -26,7 +26,8 @@ void TestHandler() { } void try_invalid_pause_resume(benchmark::State& state) { -#if !defined(TEST_BENCHMARK_LIBRARY_HAS_NO_ASSERTIONS) && !defined(TEST_HAS_NO_EXCEPTIONS) +#if !defined(TEST_BENCHMARK_LIBRARY_HAS_NO_ASSERTIONS) && \ + !defined(TEST_HAS_NO_EXCEPTIONS) try { state.PauseTiming(); std::abort(); @@ -57,13 +58,12 @@ void BM_diagnostic_test(benchmark::State& state) { } BENCHMARK(BM_diagnostic_test); - void BM_diagnostic_test_keep_running(benchmark::State& state) { static bool called_once = false; if (called_once == false) try_invalid_pause_resume(state); - while(state.KeepRunning()) { + while (state.KeepRunning()) { benchmark::DoNotOptimize(state.iterations()); } diff --git a/third-party/benchmark/test/display_aggregates_only_test.cc b/third-party/benchmark/test/display_aggregates_only_test.cc index 3c36d3f03c11..6ad65e7f516a 100644 --- a/third-party/benchmark/test/display_aggregates_only_test.cc +++ b/third-party/benchmark/test/display_aggregates_only_test.cc @@ -19,21 +19,23 @@ BENCHMARK(BM_SummaryRepeat)->Repetitions(3)->DisplayAggregatesOnly(); int main(int argc, char* argv[]) { const std::string output = GetFileReporterOutput(argc, argv); - if (SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3") != 6 || + if (SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3") != 7 || SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3\"") != 3 || SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3_mean\"") != 1 || SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3_median\"") != 1 || SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3_stddev\"") != - 1) { - std::cout << "Precondition mismatch. Expected to only find 6 " + 1 || + SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3_cv\"") != 1) { + std::cout << "Precondition mismatch. Expected to only find 8 " "occurrences of \"BM_SummaryRepeat/repeats:3\" substring:\n" "\"name\": \"BM_SummaryRepeat/repeats:3\", " "\"name\": \"BM_SummaryRepeat/repeats:3\", " "\"name\": \"BM_SummaryRepeat/repeats:3\", " "\"name\": \"BM_SummaryRepeat/repeats:3_mean\", " "\"name\": \"BM_SummaryRepeat/repeats:3_median\", " - "\"name\": \"BM_SummaryRepeat/repeats:3_stddev\"\nThe entire " + "\"name\": \"BM_SummaryRepeat/repeats:3_stddev\", " + "\"name\": \"BM_SummaryRepeat/repeats:3_cv\"\nThe entire " "output:\n"; std::cout << output; return 1; diff --git a/third-party/benchmark/test/donotoptimize_assembly_test.cc b/third-party/benchmark/test/donotoptimize_assembly_test.cc index d4b0bab70e77..2e86a51e2234 100644 --- a/third-party/benchmark/test/donotoptimize_assembly_test.cc +++ b/third-party/benchmark/test/donotoptimize_assembly_test.cc @@ -15,7 +15,7 @@ inline int Add42(int x) { return x + 42; } struct NotTriviallyCopyable { NotTriviallyCopyable(); explicit NotTriviallyCopyable(int x) : value(x) {} - NotTriviallyCopyable(NotTriviallyCopyable const&); + NotTriviallyCopyable(NotTriviallyCopyable const &); int value; }; @@ -23,7 +23,6 @@ struct Large { int value; int data[2]; }; - } // CHECK-LABEL: test_with_rvalue: extern "C" void test_with_rvalue() { @@ -118,8 +117,7 @@ extern "C" int test_div_by_two(int input) { // CHECK-LABEL: test_inc_integer: extern "C" int test_inc_integer() { int x = 0; - for (int i=0; i < 5; ++i) - benchmark::DoNotOptimize(++x); + for (int i = 0; i < 5; ++i) benchmark::DoNotOptimize(++x); // CHECK: movl $1, [[DEST:.*]] // CHECK: {{(addl \$1,|incl)}} [[DEST]] // CHECK: {{(addl \$1,|incl)}} [[DEST]] @@ -147,7 +145,7 @@ extern "C" void test_pointer_const_lvalue() { // CHECK-CLANG: movq %rax, -{{[0-9]+}}(%[[REG:[a-z]+]]) // CHECK: ret int x = 42; - int * const xp = &x; + int *const xp = &x; benchmark::DoNotOptimize(xp); } diff --git a/third-party/benchmark/test/donotoptimize_test.cc b/third-party/benchmark/test/donotoptimize_test.cc index 2ce92d1c72be..c321f156a1e0 100644 --- a/third-party/benchmark/test/donotoptimize_test.cc +++ b/third-party/benchmark/test/donotoptimize_test.cc @@ -1,27 +1,28 @@ -#include "benchmark/benchmark.h" - #include +#include "benchmark/benchmark.h" + namespace { #if defined(__GNUC__) std::uint64_t double_up(const std::uint64_t x) __attribute__((const)); #endif std::uint64_t double_up(const std::uint64_t x) { return x * 2; } -} +} // namespace // Using DoNotOptimize on types like BitRef seem to cause a lot of problems // with the inline assembly on both GCC and Clang. struct BitRef { int index; - unsigned char &byte; + unsigned char& byte; -public: + public: static BitRef Make() { static unsigned char arr[2] = {}; BitRef b(1, arr[0]); return b; } -private: + + private: BitRef(int i, unsigned char& b) : index(i), byte(b) {} }; diff --git a/third-party/benchmark/test/filter_test.cc b/third-party/benchmark/test/filter_test.cc index 1c198913b36a..a567de2dd58e 100644 --- a/third-party/benchmark/test/filter_test.cc +++ b/third-party/benchmark/test/filter_test.cc @@ -70,7 +70,7 @@ static void BM_FooBa(benchmark::State& state) { } BENCHMARK(BM_FooBa); -int main(int argc, char **argv) { +int main(int argc, char** argv) { bool list_only = false; for (int i = 0; i < argc; ++i) list_only |= std::string(argv[i]).find("--benchmark_list_tests") != diff --git a/third-party/benchmark/test/fixture_test.cc b/third-party/benchmark/test/fixture_test.cc index eba0a42d9cb0..af650dbd0661 100644 --- a/third-party/benchmark/test/fixture_test.cc +++ b/third-party/benchmark/test/fixture_test.cc @@ -1,22 +1,22 @@ -#include "benchmark/benchmark.h" - #include #include +#include "benchmark/benchmark.h" + #define FIXTURE_BECHMARK_NAME MyFixture class FIXTURE_BECHMARK_NAME : public ::benchmark::Fixture { public: void SetUp(const ::benchmark::State& state) BENCHMARK_OVERRIDE { - if (state.thread_index == 0) { + if (state.thread_index() == 0) { assert(data.get() == nullptr); data.reset(new int(42)); } } void TearDown(const ::benchmark::State& state) BENCHMARK_OVERRIDE { - if (state.thread_index == 0) { + if (state.thread_index() == 0) { assert(data.get() != nullptr); data.reset(); } @@ -27,7 +27,7 @@ class FIXTURE_BECHMARK_NAME : public ::benchmark::Fixture { std::unique_ptr data; }; -BENCHMARK_F(FIXTURE_BECHMARK_NAME, Foo)(benchmark::State &st) { +BENCHMARK_F(FIXTURE_BECHMARK_NAME, Foo)(benchmark::State& st) { assert(data.get() != nullptr); assert(*data == 42); for (auto _ : st) { @@ -35,7 +35,7 @@ BENCHMARK_F(FIXTURE_BECHMARK_NAME, Foo)(benchmark::State &st) { } BENCHMARK_DEFINE_F(FIXTURE_BECHMARK_NAME, Bar)(benchmark::State& st) { - if (st.thread_index == 0) { + if (st.thread_index() == 0) { assert(data.get() != nullptr); assert(*data == 42); } diff --git a/third-party/benchmark/test/internal_threading_test.cc b/third-party/benchmark/test/internal_threading_test.cc index 039d7c14a8c4..62b5b955a9f5 100644 --- a/third-party/benchmark/test/internal_threading_test.cc +++ b/third-party/benchmark/test/internal_threading_test.cc @@ -3,6 +3,7 @@ #include #include + #include "../src/timers.h" #include "benchmark/benchmark.h" #include "output_test.h" diff --git a/third-party/benchmark/test/map_test.cc b/third-party/benchmark/test/map_test.cc index 86391b36016f..509613457c1b 100644 --- a/third-party/benchmark/test/map_test.cc +++ b/third-party/benchmark/test/map_test.cc @@ -1,8 +1,8 @@ -#include "benchmark/benchmark.h" - #include #include +#include "benchmark/benchmark.h" + namespace { std::map ConstructRandomMap(int size) { diff --git a/third-party/benchmark/test/multiple_ranges_test.cc b/third-party/benchmark/test/multiple_ranges_test.cc index 6b61f3af47bb..7618c4da0892 100644 --- a/third-party/benchmark/test/multiple_ranges_test.cc +++ b/third-party/benchmark/test/multiple_ranges_test.cc @@ -1,10 +1,10 @@ -#include "benchmark/benchmark.h" - #include #include #include #include +#include "benchmark/benchmark.h" + class MultipleRangesFixture : public ::benchmark::Fixture { public: MultipleRangesFixture() @@ -42,7 +42,7 @@ class MultipleRangesFixture : public ::benchmark::Fixture { virtual ~MultipleRangesFixture() { if (actualValues != expectedValues) { std::cout << "EXPECTED\n"; - for (auto v : expectedValues) { + for (const auto& v : expectedValues) { std::cout << "{"; for (int64_t iv : v) { std::cout << iv << ", "; @@ -50,7 +50,7 @@ class MultipleRangesFixture : public ::benchmark::Fixture { std::cout << "}\n"; } std::cout << "ACTUAL\n"; - for (auto v : actualValues) { + for (const auto& v : actualValues) { std::cout << "{"; for (int64_t iv : v) { std::cout << iv << ", "; diff --git a/third-party/benchmark/test/options_test.cc b/third-party/benchmark/test/options_test.cc index 9f9a78667c9e..d424d40b9518 100644 --- a/third-party/benchmark/test/options_test.cc +++ b/third-party/benchmark/test/options_test.cc @@ -1,7 +1,8 @@ -#include "benchmark/benchmark.h" #include #include +#include "benchmark/benchmark.h" + #if defined(NDEBUG) #undef NDEBUG #endif @@ -65,11 +66,9 @@ void BM_explicit_iteration_count(benchmark::State& state) { // Test that the requested iteration count is respected. assert(state.max_iterations == 42); size_t actual_iterations = 0; - for (auto _ : state) - ++actual_iterations; + for (auto _ : state) ++actual_iterations; assert(state.iterations() == state.max_iterations); assert(state.iterations() == 42); - } BENCHMARK(BM_explicit_iteration_count)->Iterations(42); diff --git a/third-party/benchmark/test/output_test.h b/third-party/benchmark/test/output_test.h index 15368f9b6830..c6ff8ef2d303 100644 --- a/third-party/benchmark/test/output_test.h +++ b/third-party/benchmark/test/output_test.h @@ -85,7 +85,7 @@ std::string GetFileReporterOutput(int argc, char* argv[]); struct Results; typedef std::function ResultsCheckFn; -size_t AddChecker(const char* bm_name_pattern, ResultsCheckFn fn); +size_t AddChecker(const char* bm_name_pattern, const ResultsCheckFn& fn); // Class holding the results of a benchmark. // It is passed in calls to checker functions. @@ -113,9 +113,7 @@ struct Results { return NumIterations() * GetTime(kRealTime); } // get the cpu_time duration of the benchmark in seconds - double DurationCPUTime() const { - return NumIterations() * GetTime(kCpuTime); - } + double DurationCPUTime() const { return NumIterations() * GetTime(kCpuTime); } // get the string for a result by name, or nullptr if the name // is not found @@ -143,12 +141,12 @@ struct Results { template T Results::GetAs(const char* entry_name) const { auto* sv = Get(entry_name); - CHECK(sv != nullptr && !sv->empty()); + BM_CHECK(sv != nullptr && !sv->empty()); std::stringstream ss; ss << *sv; T out; ss >> out; - CHECK(!ss.fail()); + BM_CHECK(!ss.fail()); return out; } @@ -159,7 +157,7 @@ T Results::GetAs(const char* entry_name) const { // clang-format off #define CHECK_RESULT_VALUE_IMPL(entry, getfn, var_type, var_name, relationship, value) \ - CONCAT(CHECK_, relationship) \ + CONCAT(BM_CHECK_, relationship) \ (entry.getfn< var_type >(var_name), (value)) << "\n" \ << __FILE__ << ":" << __LINE__ << ": " << (entry).name << ":\n" \ << __FILE__ << ":" << __LINE__ << ": " \ @@ -170,7 +168,7 @@ T Results::GetAs(const char* entry_name) const { // check with tolerance. eps_factor is the tolerance window, which is // interpreted relative to value (eg, 0.1 means 10% of value). #define CHECK_FLOAT_RESULT_VALUE_IMPL(entry, getfn, var_type, var_name, relationship, value, eps_factor) \ - CONCAT(CHECK_FLOAT_, relationship) \ + CONCAT(BM_CHECK_FLOAT_, relationship) \ (entry.getfn< var_type >(var_name), (value), (eps_factor) * (value)) << "\n" \ << __FILE__ << ":" << __LINE__ << ": " << (entry).name << ":\n" \ << __FILE__ << ":" << __LINE__ << ": " \ diff --git a/third-party/benchmark/test/output_test_helper.cc b/third-party/benchmark/test/output_test_helper.cc index b8ef1205744a..81584cbf778b 100644 --- a/third-party/benchmark/test/output_test_helper.cc +++ b/third-party/benchmark/test/output_test_helper.cc @@ -10,6 +10,7 @@ #include "../src/benchmark_api_internal.h" #include "../src/check.h" // NOTE: check.h is for internal use only! +#include "../src/log.h" // NOTE: log.h is for internal use only #include "../src/re.h" // NOTE: re.h is for internal use only #include "output_test.h" @@ -40,14 +41,17 @@ SubMap& GetSubstitutions() { // clang-format off static std::string safe_dec_re = "[0-9]*[.]?[0-9]+([eE][-+][0-9]+)?"; static std::string time_re = "([0-9]+[.])?[0-9]+"; + static std::string percentage_re = "[0-9]+[.][0-9]{2}"; static SubMap map = { {"%float", "[0-9]*[.]?[0-9]+([eE][-+][0-9]+)?"}, // human-readable float {"%hrfloat", "[0-9]*[.]?[0-9]+([eE][-+][0-9]+)?[kMGTPEZYmunpfazy]?"}, + {"%percentage", percentage_re}, {"%int", "[ ]*[0-9]+"}, {" %s ", "[ ]+"}, {"%time", "[ ]*" + time_re + "[ ]+ns"}, {"%console_report", "[ ]*" + time_re + "[ ]+ns [ ]*" + time_re + "[ ]+ns [ ]*[0-9]+"}, + {"%console_percentage_report", "[ ]*" + percentage_re + "[ ]+% [ ]*" + percentage_re + "[ ]+% [ ]*[0-9]+"}, {"%console_us_report", "[ ]*" + time_re + "[ ]+us [ ]*" + time_re + "[ ]+us [ ]*[0-9]+"}, {"%console_ms_report", "[ ]*" + time_re + "[ ]+ms [ ]*" + time_re + "[ ]+ms [ ]*[0-9]+"}, {"%console_s_report", "[ ]*" + time_re + "[ ]+s [ ]*" + time_re + "[ ]+s [ ]*[0-9]+"}, @@ -94,27 +98,27 @@ void CheckCase(std::stringstream& remaining_output, TestCase const& TC, bool on_first = true; std::string line; while (remaining_output.eof() == false) { - CHECK(remaining_output.good()); + BM_CHECK(remaining_output.good()); std::getline(remaining_output, line); if (on_first) { first_line = line; on_first = false; } for (const auto& NC : not_checks) { - CHECK(!NC.regex->Match(line)) + BM_CHECK(!NC.regex->Match(line)) << "Unexpected match for line \"" << line << "\" for MR_Not regex \"" << NC.regex_str << "\"" << "\n actual regex string \"" << TC.substituted_regex << "\"" << "\n started matching near: " << first_line; } if (TC.regex->Match(line)) return; - CHECK(TC.match_rule != MR_Next) + BM_CHECK(TC.match_rule != MR_Next) << "Expected line \"" << line << "\" to match regex \"" << TC.regex_str << "\"" << "\n actual regex string \"" << TC.substituted_regex << "\"" << "\n started matching near: " << first_line; } - CHECK(remaining_output.eof() == false) + BM_CHECK(remaining_output.eof() == false) << "End of output reached before match for regex \"" << TC.regex_str << "\" was found" << "\n actual regex string \"" << TC.substituted_regex << "\"" @@ -137,14 +141,14 @@ void CheckCases(TestCaseList const& checks, std::stringstream& output) { class TestReporter : public benchmark::BenchmarkReporter { public: TestReporter(std::vector reps) - : reporters_(reps) {} + : reporters_(std::move(reps)) {} virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE { bool last_ret = false; bool first = true; for (auto rep : reporters_) { bool new_ret = rep->ReportContext(context); - CHECK(first || new_ret == last_ret) + BM_CHECK(first || new_ret == last_ret) << "Reports return different values for ReportContext"; first = false; last_ret = new_ret; @@ -179,7 +183,7 @@ class ResultsChecker { public: struct PatternAndFn : public TestCase { // reusing TestCase for its regexes PatternAndFn(const std::string& rx, ResultsCheckFn fn_) - : TestCase(rx), fn(fn_) {} + : TestCase(rx), fn(std::move(fn_)) {} ResultsCheckFn fn; }; @@ -187,7 +191,7 @@ class ResultsChecker { std::vector results; std::vector field_names; - void Add(const std::string& entry_pattern, ResultsCheckFn fn); + void Add(const std::string& entry_pattern, const ResultsCheckFn& fn); void CheckResults(std::stringstream& output); @@ -206,7 +210,8 @@ ResultsChecker& GetResultsChecker() { } // add a results checker for a benchmark -void ResultsChecker::Add(const std::string& entry_pattern, ResultsCheckFn fn) { +void ResultsChecker::Add(const std::string& entry_pattern, + const ResultsCheckFn& fn) { check_patterns.emplace_back(entry_pattern, fn); } @@ -226,7 +231,7 @@ void ResultsChecker::CheckResults(std::stringstream& output) { std::string line; bool on_first = true; while (output.eof() == false) { - CHECK(output.good()); + BM_CHECK(output.good()); std::getline(output, line); if (on_first) { SetHeader_(line); // this is important @@ -237,18 +242,18 @@ void ResultsChecker::CheckResults(std::stringstream& output) { } // finally we can call the subscribed check functions for (const auto& p : check_patterns) { - VLOG(2) << "--------------------------------\n"; - VLOG(2) << "checking for benchmarks matching " << p.regex_str << "...\n"; + BM_VLOG(2) << "--------------------------------\n"; + BM_VLOG(2) << "checking for benchmarks matching " << p.regex_str << "...\n"; for (const auto& r : results) { if (!p.regex->Match(r.name)) { - VLOG(2) << p.regex_str << " is not matched by " << r.name << "\n"; + BM_VLOG(2) << p.regex_str << " is not matched by " << r.name << "\n"; continue; } else { - VLOG(2) << p.regex_str << " is matched by " << r.name << "\n"; + BM_VLOG(2) << p.regex_str << " is matched by " << r.name << "\n"; } - VLOG(1) << "Checking results of " << r.name << ": ... \n"; + BM_VLOG(1) << "Checking results of " << r.name << ": ... \n"; p.fn(r); - VLOG(1) << "Checking results of " << r.name << ": OK.\n"; + BM_VLOG(1) << "Checking results of " << r.name << ": OK.\n"; } } } @@ -261,9 +266,9 @@ void ResultsChecker::SetHeader_(const std::string& csv_header) { // set the values for a benchmark void ResultsChecker::SetValues_(const std::string& entry_csv_line) { if (entry_csv_line.empty()) return; // some lines are empty - CHECK(!field_names.empty()); + BM_CHECK(!field_names.empty()); auto vals = SplitCsv_(entry_csv_line); - CHECK_EQ(vals.size(), field_names.size()); + BM_CHECK_EQ(vals.size(), field_names.size()); results.emplace_back(vals[0]); // vals[0] is the benchmark name auto& entry = results.back(); for (size_t i = 1, e = vals.size(); i < e; ++i) { @@ -278,7 +283,7 @@ std::vector ResultsChecker::SplitCsv_(const std::string& line) { if (!field_names.empty()) out.reserve(field_names.size()); size_t prev = 0, pos = line.find_first_of(','), curr = pos; while (pos != line.npos) { - CHECK(curr > 0); + BM_CHECK(curr > 0); if (line[prev] == '"') ++prev; if (line[curr - 1] == '"') --curr; out.push_back(line.substr(prev, curr - prev)); @@ -295,7 +300,7 @@ std::vector ResultsChecker::SplitCsv_(const std::string& line) { } // end namespace internal -size_t AddChecker(const char* bm_name, ResultsCheckFn fn) { +size_t AddChecker(const char* bm_name, const ResultsCheckFn& fn) { auto& rc = internal::GetResultsChecker(); rc.Add(bm_name, fn); return rc.results.size(); @@ -309,20 +314,18 @@ int Results::NumThreads() const { ss << name.substr(pos + 9, end); int num = 1; ss >> num; - CHECK(!ss.fail()); + BM_CHECK(!ss.fail()); return num; } -double Results::NumIterations() const { - return GetAs("iterations"); -} +double Results::NumIterations() const { return GetAs("iterations"); } double Results::GetTime(BenchmarkTime which) const { - CHECK(which == kCpuTime || which == kRealTime); + BM_CHECK(which == kCpuTime || which == kRealTime); const char* which_str = which == kCpuTime ? "cpu_time" : "real_time"; double val = GetAs(which_str); auto unit = Get("time_unit"); - CHECK(unit); + BM_CHECK(unit); if (*unit == "ns") { return val * 1.e-9; } else if (*unit == "us") { @@ -332,7 +335,7 @@ double Results::GetTime(BenchmarkTime which) const { } else if (*unit == "s") { return val; } else { - CHECK(1 == 0) << "unknown time unit: " << *unit; + BM_CHECK(1 == 0) << "unknown time unit: " << *unit; return 0; } } @@ -348,10 +351,10 @@ TestCase::TestCase(std::string re, int rule) regex(std::make_shared()) { std::string err_str; regex->Init(substituted_regex, &err_str); - CHECK(err_str.empty()) << "Could not construct regex \"" << substituted_regex - << "\"" - << "\n originally \"" << regex_str << "\"" - << "\n got error: " << err_str; + BM_CHECK(err_str.empty()) + << "Could not construct regex \"" << substituted_regex << "\"" + << "\n originally \"" << regex_str << "\"" + << "\n got error: " << err_str; } int AddCases(TestCaseID ID, std::initializer_list il) { @@ -380,10 +383,8 @@ int SetSubstitutions( // Disable deprecated warnings temporarily because we need to reference // CSVReporter but don't want to trigger -Werror=-Wdeprecated-declarations -#ifdef __GNUC__ -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" -#endif +BENCHMARK_DISABLE_DEPRECATED_WARNING + void RunOutputTests(int argc, char* argv[]) { using internal::GetTestCaseList; benchmark::Initialize(&argc, argv); @@ -438,13 +439,11 @@ void RunOutputTests(int argc, char* argv[]) { // the checks to subscribees. auto& csv = TestCases[2]; // would use == but gcc spits a warning - CHECK(std::strcmp(csv.name, "CSVReporter") == 0); + BM_CHECK(std::strcmp(csv.name, "CSVReporter") == 0); internal::GetResultsChecker().CheckResults(csv.out_stream); } -#ifdef __GNUC__ -#pragma GCC diagnostic pop -#endif +BENCHMARK_RESTORE_DEPRECATED_WARNING int SubstrCnt(const std::string& haystack, const std::string& pat) { if (pat.length() == 0) return 0; @@ -468,9 +467,8 @@ static char RandomHexChar() { static std::string GetRandomFileName() { std::string model = "test.%%%%%%"; - for (auto & ch : model) { - if (ch == '%') - ch = RandomHexChar(); + for (auto& ch : model) { + if (ch == '%') ch = RandomHexChar(); } return model; } @@ -487,8 +485,7 @@ static std::string GetTempFileName() { int retries = 3; while (--retries) { std::string name = GetRandomFileName(); - if (!FileExists(name)) - return name; + if (!FileExists(name)) return name; } std::cerr << "Failed to create unique temporary file name" << std::endl; std::abort(); diff --git a/third-party/benchmark/test/perf_counters_gtest.cc b/third-party/benchmark/test/perf_counters_gtest.cc index 2a2868a71536..3eac62463bc5 100644 --- a/third-party/benchmark/test/perf_counters_gtest.cc +++ b/third-party/benchmark/test/perf_counters_gtest.cc @@ -5,7 +5,7 @@ #ifndef GTEST_SKIP struct MsgHandler { - void operator=(std::ostream&){} + void operator=(std::ostream&) {} }; #define GTEST_SKIP() return MsgHandler() = std::cout #endif @@ -103,10 +103,10 @@ size_t do_work() { void measure(size_t threadcount, PerfCounterValues* values1, PerfCounterValues* values2) { - CHECK_NE(values1, nullptr); - CHECK_NE(values2, nullptr); + BM_CHECK_NE(values1, nullptr); + BM_CHECK_NE(values2, nullptr); std::vector threads(threadcount); - auto work = [&]() { CHECK(do_work() > 1000); }; + auto work = [&]() { BM_CHECK(do_work() > 1000); }; // We need to first set up the counters, then start the threads, so the // threads would inherit the counters. But later, we need to first destroy the diff --git a/third-party/benchmark/test/perf_counters_test.cc b/third-party/benchmark/test/perf_counters_test.cc index d6e0284d4d4b..3017a452fe27 100644 --- a/third-party/benchmark/test/perf_counters_test.cc +++ b/third-party/benchmark/test/perf_counters_test.cc @@ -5,7 +5,7 @@ #include "benchmark/benchmark.h" #include "output_test.h" -void BM_Simple(benchmark::State& state) { +static void BM_Simple(benchmark::State& state) { for (auto _ : state) { benchmark::DoNotOptimize(state.iterations()); } @@ -13,7 +13,7 @@ void BM_Simple(benchmark::State& state) { BENCHMARK(BM_Simple); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Simple\",$"}}); -void CheckSimple(Results const& e) { +static void CheckSimple(Results const& e) { CHECK_COUNTER_VALUE(e, double, "CYCLES", GT, 0); CHECK_COUNTER_VALUE(e, double, "BRANCHES", GT, 0.0); } diff --git a/third-party/benchmark/test/register_benchmark_test.cc b/third-party/benchmark/test/register_benchmark_test.cc index c027eabacae0..602405b67e8d 100644 --- a/third-party/benchmark/test/register_benchmark_test.cc +++ b/third-party/benchmark/test/register_benchmark_test.cc @@ -30,13 +30,13 @@ struct TestCase { void CheckRun(Run const& run) const { // clang-format off - CHECK(name == run.benchmark_name()) << "expected " << name << " got " + BM_CHECK(name == run.benchmark_name()) << "expected " << name << " got " << run.benchmark_name(); if (label) { - CHECK(run.report_label == label) << "expected " << label << " got " + BM_CHECK(run.report_label == label) << "expected " << label << " got " << run.report_label; } else { - CHECK(run.report_label == ""); + BM_CHECK(run.report_label.empty()); } // clang-format on } @@ -45,7 +45,7 @@ struct TestCase { std::vector ExpectedResults; int AddCases(std::initializer_list const& v) { - for (auto N : v) { + for (const auto& N : v) { ExpectedResults.push_back(N); } return 0; diff --git a/third-party/benchmark/test/repetitions_test.cc b/third-party/benchmark/test/repetitions_test.cc index f93de502a35a..569777d5f933 100644 --- a/third-party/benchmark/test/repetitions_test.cc +++ b/third-party/benchmark/test/repetitions_test.cc @@ -6,7 +6,7 @@ // ------------------------ Testing Basic Output --------------------------- // // ========================================================================= // -void BM_ExplicitRepetitions(benchmark::State& state) { +static void BM_ExplicitRepetitions(benchmark::State& state) { for (auto _ : state) { } } @@ -59,6 +59,7 @@ ADD_CASES(TC_JSONOut, {"\"repetitions\": 2,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"mean\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, @@ -73,6 +74,7 @@ ADD_CASES(TC_JSONOut, {"\"repetitions\": 2,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"median\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, @@ -87,6 +89,7 @@ ADD_CASES(TC_JSONOut, {"\"repetitions\": 2,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"stddev\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, @@ -105,7 +108,7 @@ ADD_CASES(TC_CSVOut, // ------------------------ Testing Basic Output --------------------------- // // ========================================================================= // -void BM_ImplicitRepetitions(benchmark::State& state) { +static void BM_ImplicitRepetitions(benchmark::State& state) { for (auto _ : state) { } } @@ -164,6 +167,7 @@ ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_ImplicitRepetitions_mean\",$"}, {"\"repetitions\": 3,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"mean\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, @@ -177,6 +181,7 @@ ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_ImplicitRepetitions_median\",$"}, {"\"repetitions\": 3,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"median\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, @@ -190,6 +195,7 @@ ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_ImplicitRepetitions_stddev\",$"}, {"\"repetitions\": 3,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"stddev\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, diff --git a/third-party/benchmark/test/report_aggregates_only_test.cc b/third-party/benchmark/test/report_aggregates_only_test.cc index 9646b9be534d..47da50358885 100644 --- a/third-party/benchmark/test/report_aggregates_only_test.cc +++ b/third-party/benchmark/test/report_aggregates_only_test.cc @@ -19,17 +19,19 @@ BENCHMARK(BM_SummaryRepeat)->Repetitions(3)->ReportAggregatesOnly(); int main(int argc, char* argv[]) { const std::string output = GetFileReporterOutput(argc, argv); - if (SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3") != 3 || + if (SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3") != 4 || SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3_mean\"") != 1 || SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3_median\"") != 1 || SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3_stddev\"") != - 1) { - std::cout << "Precondition mismatch. Expected to only find three " + 1 || + SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3_cv\"") != 1) { + std::cout << "Precondition mismatch. Expected to only find four " "occurrences of \"BM_SummaryRepeat/repeats:3\" substring:\n" "\"name\": \"BM_SummaryRepeat/repeats:3_mean\", " "\"name\": \"BM_SummaryRepeat/repeats:3_median\", " - "\"name\": \"BM_SummaryRepeat/repeats:3_stddev\"\nThe entire " + "\"name\": \"BM_SummaryRepeat/repeats:3_stddev\", " + "\"name\": \"BM_SummaryRepeat/repeats:3_cv\"\nThe entire " "output:\n"; std::cout << output; return 1; diff --git a/third-party/benchmark/test/reporter_output_test.cc b/third-party/benchmark/test/reporter_output_test.cc index 989eb48ecc81..2b6e6543dd2e 100644 --- a/third-party/benchmark/test/reporter_output_test.cc +++ b/third-party/benchmark/test/reporter_output_test.cc @@ -1,5 +1,6 @@ #undef NDEBUG +#include #include #include "benchmark/benchmark.h" @@ -454,6 +455,7 @@ ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Repeat/repeats:2\",$"}, {"\"repetitions\": 2,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"mean\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 2,$", MR_Next}, {"\"name\": \"BM_Repeat/repeats:2_median\",$"}, {"\"family_index\": 15,$", MR_Next}, @@ -463,6 +465,7 @@ ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Repeat/repeats:2\",$"}, {"\"repetitions\": 2,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"median\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 2,$", MR_Next}, {"\"name\": \"BM_Repeat/repeats:2_stddev\",$"}, {"\"family_index\": 15,$", MR_Next}, @@ -472,6 +475,7 @@ ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Repeat/repeats:2\",$"}, {"\"repetitions\": 2,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"stddev\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 2,$", MR_Next}}); ADD_CASES(TC_CSVOut, {{"^\"BM_Repeat/repeats:2\",%csv_report$"}, {"^\"BM_Repeat/repeats:2\",%csv_report$"}, @@ -519,6 +523,7 @@ ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Repeat/repeats:3\",$"}, {"\"repetitions\": 3,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"mean\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 3,$", MR_Next}, {"\"name\": \"BM_Repeat/repeats:3_median\",$"}, {"\"family_index\": 16,$", MR_Next}, @@ -528,6 +533,7 @@ ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Repeat/repeats:3\",$"}, {"\"repetitions\": 3,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"median\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 3,$", MR_Next}, {"\"name\": \"BM_Repeat/repeats:3_stddev\",$"}, {"\"family_index\": 16,$", MR_Next}, @@ -537,6 +543,7 @@ ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Repeat/repeats:3\",$"}, {"\"repetitions\": 3,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"stddev\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 3,$", MR_Next}}); ADD_CASES(TC_CSVOut, {{"^\"BM_Repeat/repeats:3\",%csv_report$"}, {"^\"BM_Repeat/repeats:3\",%csv_report$"}, @@ -594,6 +601,7 @@ ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Repeat/repeats:4\",$"}, {"\"repetitions\": 4,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"mean\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 4,$", MR_Next}, {"\"name\": \"BM_Repeat/repeats:4_median\",$"}, {"\"family_index\": 17,$", MR_Next}, @@ -603,6 +611,7 @@ ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Repeat/repeats:4\",$"}, {"\"repetitions\": 4,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"median\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 4,$", MR_Next}, {"\"name\": \"BM_Repeat/repeats:4_stddev\",$"}, {"\"family_index\": 17,$", MR_Next}, @@ -612,6 +621,7 @@ ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Repeat/repeats:4\",$"}, {"\"repetitions\": 4,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"stddev\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 4,$", MR_Next}}); ADD_CASES(TC_CSVOut, {{"^\"BM_Repeat/repeats:4\",%csv_report$"}, {"^\"BM_Repeat/repeats:4\",%csv_report$"}, @@ -661,6 +671,7 @@ ADD_CASES(TC_JSONOut, {"\"repetitions\": 3,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"mean\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 3,$", MR_Next}, {"\"name\": \"BM_SummaryRepeat/repeats:3_median\",$"}, {"\"family_index\": 19,$", MR_Next}, @@ -670,6 +681,7 @@ ADD_CASES(TC_JSONOut, {"\"repetitions\": 3,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"median\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 3,$", MR_Next}, {"\"name\": \"BM_SummaryRepeat/repeats:3_stddev\",$"}, {"\"family_index\": 19,$", MR_Next}, @@ -679,6 +691,7 @@ ADD_CASES(TC_JSONOut, {"\"repetitions\": 3,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"stddev\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 3,$", MR_Next}}); ADD_CASES(TC_CSVOut, {{".*BM_SummaryRepeat/repeats:3 ", MR_Not}, {"^\"BM_SummaryRepeat/repeats:3_mean\",%csv_report$"}, @@ -709,6 +722,7 @@ ADD_CASES(TC_JSONOut, {"\"repetitions\": 2,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"mean\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 2,$", MR_Next}, {"\"name\": \"BM_SummaryDisplay/repeats:2_median\",$"}, {"\"family_index\": 20,$", MR_Next}, @@ -718,6 +732,7 @@ ADD_CASES(TC_JSONOut, {"\"repetitions\": 2,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"median\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 2,$", MR_Next}, {"\"name\": \"BM_SummaryDisplay/repeats:2_stddev\",$"}, {"\"family_index\": 20,$", MR_Next}, @@ -727,6 +742,7 @@ ADD_CASES(TC_JSONOut, {"\"repetitions\": 2,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"stddev\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 2,$", MR_Next}}); ADD_CASES(TC_CSVOut, {{".*BM_SummaryDisplay/repeats:2 ", MR_Not}, @@ -761,6 +777,7 @@ ADD_CASES(TC_JSONOut, {"\"repetitions\": 3,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"mean\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 3,$", MR_Next}, {"\"time_unit\": \"us\",?$"}, {"\"name\": \"BM_RepeatTimeUnit/repeats:3_median\",$"}, @@ -771,6 +788,7 @@ ADD_CASES(TC_JSONOut, {"\"repetitions\": 3,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"median\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 3,$", MR_Next}, {"\"time_unit\": \"us\",?$"}, {"\"name\": \"BM_RepeatTimeUnit/repeats:3_stddev\",$"}, @@ -781,6 +799,7 @@ ADD_CASES(TC_JSONOut, {"\"repetitions\": 3,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"stddev\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 3,$", MR_Next}, {"\"time_unit\": \"us\",?$"}}); ADD_CASES(TC_CSVOut, @@ -869,6 +888,7 @@ ADD_CASES( {"\"repetitions\": 3,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"mean\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 3,$", MR_Next}, {"\"real_time\": 1\\.5(0)*e\\+(0)*2,$", MR_Next}, {"\"name\": \"BM_UserStats/iterations:5/repeats:3/manual_time_median\",$"}, @@ -880,6 +900,7 @@ ADD_CASES( {"\"repetitions\": 3,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"median\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 3,$", MR_Next}, {"\"real_time\": 1\\.5(0)*e\\+(0)*2,$", MR_Next}, {"\"name\": \"BM_UserStats/iterations:5/repeats:3/manual_time_stddev\",$"}, @@ -891,6 +912,7 @@ ADD_CASES( {"\"repetitions\": 3,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"stddev\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 3,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"name\": \"BM_UserStats/iterations:5/repeats:3/manual_time_\",$"}, @@ -902,6 +924,7 @@ ADD_CASES( {"\"repetitions\": 3,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 3,$", MR_Next}, {"\"real_time\": 1\\.5(0)*e\\+(0)*2,$", MR_Next}}); ADD_CASES( @@ -916,6 +939,154 @@ ADD_CASES( "manual_time_stddev\",%csv_report$"}, {"^\"BM_UserStats/iterations:5/repeats:3/manual_time_\",%csv_report$"}}); +// ========================================================================= // +// ------------- Testing relative standard deviation statistics ------------ // +// ========================================================================= // + +const auto UserPercentStatistics = [](const std::vector&) { + return 1. / 100.; +}; +void BM_UserPercentStats(benchmark::State& state) { + for (auto _ : state) { + state.SetIterationTime(150 / 10e8); + } +} +// clang-format off +BENCHMARK(BM_UserPercentStats) + ->Repetitions(3) + ->Iterations(5) + ->UseManualTime() + ->Unit(benchmark::TimeUnit::kNanosecond) + ->ComputeStatistics("", UserPercentStatistics, benchmark::StatisticUnit::kPercentage); +// clang-format on + +// check that UserPercent-provided stats is calculated, and is after the +// default-ones empty string as name is intentional, it would sort before +// anything else +ADD_CASES(TC_ConsoleOut, + {{"^BM_UserPercentStats/iterations:5/repeats:3/manual_time [ " + "]* 150 ns %time [ ]*5$"}, + {"^BM_UserPercentStats/iterations:5/repeats:3/manual_time [ " + "]* 150 ns %time [ ]*5$"}, + {"^BM_UserPercentStats/iterations:5/repeats:3/manual_time [ " + "]* 150 ns %time [ ]*5$"}, + {"^BM_UserPercentStats/iterations:5/repeats:3/" + "manual_time_mean [ ]* 150 ns %time [ ]*3$"}, + {"^BM_UserPercentStats/iterations:5/repeats:3/" + "manual_time_median [ ]* 150 ns %time [ ]*3$"}, + {"^BM_UserPercentStats/iterations:5/repeats:3/" + "manual_time_stddev [ ]* 0.000 ns %time [ ]*3$"}, + {"^BM_UserPercentStats/iterations:5/repeats:3/manual_time_ " + "[ ]* 1.00 % [ ]* 1.00 %[ ]*3$"}}); +ADD_CASES( + TC_JSONOut, + {{"\"name\": \"BM_UserPercentStats/iterations:5/repeats:3/manual_time\",$"}, + {"\"family_index\": 23,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": " + "\"BM_UserPercentStats/iterations:5/repeats:3/manual_time\",$", + MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 3,$", MR_Next}, + {"\"repetition_index\": 0,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"iterations\": 5,$", MR_Next}, + {"\"real_time\": 1\\.5(0)*e\\+(0)*2,$", MR_Next}, + {"\"name\": \"BM_UserPercentStats/iterations:5/repeats:3/manual_time\",$"}, + {"\"family_index\": 23,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": " + "\"BM_UserPercentStats/iterations:5/repeats:3/manual_time\",$", + MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 3,$", MR_Next}, + {"\"repetition_index\": 1,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"iterations\": 5,$", MR_Next}, + {"\"real_time\": 1\\.5(0)*e\\+(0)*2,$", MR_Next}, + {"\"name\": \"BM_UserPercentStats/iterations:5/repeats:3/manual_time\",$"}, + {"\"family_index\": 23,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": " + "\"BM_UserPercentStats/iterations:5/repeats:3/manual_time\",$", + MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 3,$", MR_Next}, + {"\"repetition_index\": 2,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"iterations\": 5,$", MR_Next}, + {"\"real_time\": 1\\.5(0)*e\\+(0)*2,$", MR_Next}, + {"\"name\": " + "\"BM_UserPercentStats/iterations:5/repeats:3/manual_time_mean\",$"}, + {"\"family_index\": 23,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": " + "\"BM_UserPercentStats/iterations:5/repeats:3/manual_time\",$", + MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 3,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"aggregate_name\": \"mean\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, + {"\"iterations\": 3,$", MR_Next}, + {"\"real_time\": 1\\.5(0)*e\\+(0)*2,$", MR_Next}, + {"\"name\": " + "\"BM_UserPercentStats/iterations:5/repeats:3/manual_time_median\",$"}, + {"\"family_index\": 23,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": " + "\"BM_UserPercentStats/iterations:5/repeats:3/manual_time\",$", + MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 3,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"aggregate_name\": \"median\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, + {"\"iterations\": 3,$", MR_Next}, + {"\"real_time\": 1\\.5(0)*e\\+(0)*2,$", MR_Next}, + {"\"name\": " + "\"BM_UserPercentStats/iterations:5/repeats:3/manual_time_stddev\",$"}, + {"\"family_index\": 23,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": " + "\"BM_UserPercentStats/iterations:5/repeats:3/manual_time\",$", + MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 3,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"aggregate_name\": \"stddev\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, + {"\"iterations\": 3,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"name\": " + "\"BM_UserPercentStats/iterations:5/repeats:3/manual_time_\",$"}, + {"\"family_index\": 23,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": " + "\"BM_UserPercentStats/iterations:5/repeats:3/manual_time\",$", + MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 3,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"aggregate_name\": \"\",$", MR_Next}, + {"\"aggregate_unit\": \"percentage\",$", MR_Next}, + {"\"iterations\": 3,$", MR_Next}, + {"\"real_time\": 1\\.(0)*e-(0)*2,$", MR_Next}}); +ADD_CASES(TC_CSVOut, {{"^\"BM_UserPercentStats/iterations:5/repeats:3/" + "manual_time\",%csv_report$"}, + {"^\"BM_UserPercentStats/iterations:5/repeats:3/" + "manual_time\",%csv_report$"}, + {"^\"BM_UserPercentStats/iterations:5/repeats:3/" + "manual_time\",%csv_report$"}, + {"^\"BM_UserPercentStats/iterations:5/repeats:3/" + "manual_time_mean\",%csv_report$"}, + {"^\"BM_UserPercentStats/iterations:5/repeats:3/" + "manual_time_median\",%csv_report$"}, + {"^\"BM_UserPercentStats/iterations:5/repeats:3/" + "manual_time_stddev\",%csv_report$"}, + {"^\"BM_UserPercentStats/iterations:5/repeats:3/" + "manual_time_\",%csv_report$"}}); + // ========================================================================= // // ------------------------- Testing StrEscape JSON ------------------------ // // ========================================================================= // diff --git a/third-party/benchmark/test/skip_with_error_test.cc b/third-party/benchmark/test/skip_with_error_test.cc index 827966e9dfe3..026d47913350 100644 --- a/third-party/benchmark/test/skip_with_error_test.cc +++ b/third-party/benchmark/test/skip_with_error_test.cc @@ -33,14 +33,14 @@ struct TestCase { typedef benchmark::BenchmarkReporter::Run Run; void CheckRun(Run const& run) const { - CHECK(name == run.benchmark_name()) + BM_CHECK(name == run.benchmark_name()) << "expected " << name << " got " << run.benchmark_name(); - CHECK(error_occurred == run.error_occurred); - CHECK(error_message == run.error_message); + BM_CHECK(error_occurred == run.error_occurred); + BM_CHECK(error_message == run.error_message); if (error_occurred) { - // CHECK(run.iterations == 0); + // BM_CHECK(run.iterations == 0); } else { - CHECK(run.iterations != 0); + BM_CHECK(run.iterations != 0); } } }; @@ -97,7 +97,7 @@ ADD_CASES("BM_error_before_running_range_for", {{"", true, "error message"}}); void BM_error_during_running(benchmark::State& state) { int first_iter = true; while (state.KeepRunning()) { - if (state.range(0) == 1 && state.thread_index <= (state.threads / 2)) { + if (state.range(0) == 1 && state.thread_index() <= (state.threads() / 2)) { assert(first_iter); first_iter = false; state.SkipWithError("error message"); @@ -119,12 +119,13 @@ ADD_CASES("BM_error_during_running", {{"/1/threads:1", true, "error message"}, void BM_error_during_running_ranged_for(benchmark::State& state) { assert(state.max_iterations > 3 && "test requires at least a few iterations"); - int first_iter = true; + bool first_iter = true; // NOTE: Users should not write the for loop explicitly. for (auto It = state.begin(), End = state.end(); It != End; ++It) { if (state.range(0) == 1) { assert(first_iter); first_iter = false; + (void)first_iter; state.SkipWithError("error message"); // Test the unfortunate but documented behavior that the ranged-for loop // doesn't automatically terminate when SkipWithError is set. @@ -142,7 +143,7 @@ void BM_error_after_running(benchmark::State& state) { for (auto _ : state) { benchmark::DoNotOptimize(state.iterations()); } - if (state.thread_index <= (state.threads / 2)) + if (state.thread_index() <= (state.threads() / 2)) state.SkipWithError("error message"); } BENCHMARK(BM_error_after_running)->ThreadRange(1, 8); @@ -154,7 +155,7 @@ ADD_CASES("BM_error_after_running", {{"/threads:1", true, "error message"}, void BM_error_while_paused(benchmark::State& state) { bool first_iter = true; while (state.KeepRunning()) { - if (state.range(0) == 1 && state.thread_index <= (state.threads / 2)) { + if (state.range(0) == 1 && state.thread_index() <= (state.threads() / 2)) { assert(first_iter); first_iter = false; state.PauseTiming(); diff --git a/third-party/benchmark/test/spec_arg_test.cc b/third-party/benchmark/test/spec_arg_test.cc new file mode 100644 index 000000000000..043db1be47a2 --- /dev/null +++ b/third-party/benchmark/test/spec_arg_test.cc @@ -0,0 +1,95 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "benchmark/benchmark.h" + +// Tests that we can override benchmark-spec value from FLAGS_benchmark_filter +// with argument to RunSpecifiedBenchmarks(...). + +namespace { + +class TestReporter : public benchmark::ConsoleReporter { + public: + virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE { + return ConsoleReporter::ReportContext(context); + }; + + virtual void ReportRuns(const std::vector& report) BENCHMARK_OVERRIDE { + assert(report.size() == 1); + matched_functions.push_back(report[0].run_name.function_name); + ConsoleReporter::ReportRuns(report); + }; + + TestReporter() {} + + virtual ~TestReporter() {} + + const std::vector& GetMatchedFunctions() const { + return matched_functions; + } + + private: + std::vector matched_functions; +}; + +} // end namespace + +static void BM_NotChosen(benchmark::State& state) { + assert(false && "SHOULD NOT BE CALLED"); + for (auto _ : state) { + } +} +BENCHMARK(BM_NotChosen); + +static void BM_Chosen(benchmark::State& state) { + for (auto _ : state) { + } +} +BENCHMARK(BM_Chosen); + +int main(int argc, char** argv) { + const std::string flag = "BM_NotChosen"; + + // Verify that argv specify --benchmark_filter=BM_NotChosen. + bool found = false; + for (int i = 0; i < argc; ++i) { + if (strcmp("--benchmark_filter=BM_NotChosen", argv[i]) == 0) { + found = true; + break; + } + } + assert(found); + + benchmark::Initialize(&argc, argv); + + // Check that the current flag value is reported accurately via the + // GetBenchmarkFilter() function. + if (flag != benchmark::GetBenchmarkFilter()) { + std::cerr + << "Seeing different value for flags. GetBenchmarkFilter() returns [" + << benchmark::GetBenchmarkFilter() << "] expected flag=[" << flag + << "]\n"; + return 1; + } + TestReporter test_reporter; + const char* const spec = "BM_Chosen"; + const size_t returned_count = + benchmark::RunSpecifiedBenchmarks(&test_reporter, spec); + assert(returned_count == 1); + const std::vector matched_functions = + test_reporter.GetMatchedFunctions(); + assert(matched_functions.size() == 1); + if (strcmp(spec, matched_functions.front().c_str()) != 0) { + std::cerr << "Expected benchmark [" << spec << "] to run, but got [" + << matched_functions.front() << "]\n"; + return 2; + } + return 0; +} diff --git a/third-party/benchmark/test/statistics_gtest.cc b/third-party/benchmark/test/statistics_gtest.cc index 3ddc72dd7ac6..1de2d87d4ba5 100644 --- a/third-party/benchmark/test/statistics_gtest.cc +++ b/third-party/benchmark/test/statistics_gtest.cc @@ -25,4 +25,11 @@ TEST(StatisticsTest, StdDev) { 1.151086443322134); } +TEST(StatisticsTest, CV) { + EXPECT_DOUBLE_EQ(benchmark::StatisticsCV({101, 101, 101, 101}), 0.0); + EXPECT_DOUBLE_EQ(benchmark::StatisticsCV({1, 2, 3}), 1. / 2.); + EXPECT_DOUBLE_EQ(benchmark::StatisticsCV({2.5, 2.4, 3.3, 4.2, 5.1}), + 0.32888184094918121); +} + } // end namespace diff --git a/third-party/benchmark/test/string_util_gtest.cc b/third-party/benchmark/test/string_util_gtest.cc index c7061b409e91..698f2d43eb88 100644 --- a/third-party/benchmark/test/string_util_gtest.cc +++ b/third-party/benchmark/test/string_util_gtest.cc @@ -2,8 +2,8 @@ // statistics_test - Unit tests for src/statistics.cc //===---------------------------------------------------------------------===// -#include "../src/string_util.h" #include "../src/internal_macros.h" +#include "../src/string_util.h" #include "gtest/gtest.h" namespace { @@ -32,7 +32,8 @@ TEST(StringUtilTest, stoul) { #elif ULONG_MAX == 0xFFFFFFFFFFFFFFFFul { size_t pos = 0; - EXPECT_EQ(0xFFFFFFFFFFFFFFFFul, benchmark::stoul("18446744073709551615", &pos)); + EXPECT_EQ(0xFFFFFFFFFFFFFFFFul, + benchmark::stoul("18446744073709551615", &pos)); EXPECT_EQ(20ul, pos); } #endif @@ -62,91 +63,81 @@ TEST(StringUtilTest, stoul) { EXPECT_EQ(4ul, pos); } #ifndef BENCHMARK_HAS_NO_EXCEPTIONS - { - ASSERT_THROW(benchmark::stoul("this is a test"), std::invalid_argument); - } + { ASSERT_THROW(benchmark::stoul("this is a test"), std::invalid_argument); } #endif } -TEST(StringUtilTest, stoi) { - { - size_t pos = 0; - EXPECT_EQ(0, benchmark::stoi("0", &pos)); - EXPECT_EQ(1ul, pos); - } - { - size_t pos = 0; - EXPECT_EQ(-17, benchmark::stoi("-17", &pos)); - EXPECT_EQ(3ul, pos); - } - { - size_t pos = 0; - EXPECT_EQ(1357, benchmark::stoi("1357", &pos)); - EXPECT_EQ(4ul, pos); - } - { - size_t pos = 0; - EXPECT_EQ(10, benchmark::stoi("1010", &pos, 2)); - EXPECT_EQ(4ul, pos); - } - { - size_t pos = 0; - EXPECT_EQ(520, benchmark::stoi("1010", &pos, 8)); - EXPECT_EQ(4ul, pos); - } - { - size_t pos = 0; - EXPECT_EQ(1010, benchmark::stoi("1010", &pos, 10)); - EXPECT_EQ(4ul, pos); - } - { - size_t pos = 0; - EXPECT_EQ(4112, benchmark::stoi("1010", &pos, 16)); - EXPECT_EQ(4ul, pos); - } - { - size_t pos = 0; - EXPECT_EQ(0xBEEF, benchmark::stoi("BEEF", &pos, 16)); - EXPECT_EQ(4ul, pos); - } +TEST(StringUtilTest, stoi){{size_t pos = 0; +EXPECT_EQ(0, benchmark::stoi("0", &pos)); +EXPECT_EQ(1ul, pos); +} // namespace +{ + size_t pos = 0; + EXPECT_EQ(-17, benchmark::stoi("-17", &pos)); + EXPECT_EQ(3ul, pos); +} +{ + size_t pos = 0; + EXPECT_EQ(1357, benchmark::stoi("1357", &pos)); + EXPECT_EQ(4ul, pos); +} +{ + size_t pos = 0; + EXPECT_EQ(10, benchmark::stoi("1010", &pos, 2)); + EXPECT_EQ(4ul, pos); +} +{ + size_t pos = 0; + EXPECT_EQ(520, benchmark::stoi("1010", &pos, 8)); + EXPECT_EQ(4ul, pos); +} +{ + size_t pos = 0; + EXPECT_EQ(1010, benchmark::stoi("1010", &pos, 10)); + EXPECT_EQ(4ul, pos); +} +{ + size_t pos = 0; + EXPECT_EQ(4112, benchmark::stoi("1010", &pos, 16)); + EXPECT_EQ(4ul, pos); +} +{ + size_t pos = 0; + EXPECT_EQ(0xBEEF, benchmark::stoi("BEEF", &pos, 16)); + EXPECT_EQ(4ul, pos); +} #ifndef BENCHMARK_HAS_NO_EXCEPTIONS - { - ASSERT_THROW(benchmark::stoi("this is a test"), std::invalid_argument); - } +{ ASSERT_THROW(benchmark::stoi("this is a test"), std::invalid_argument); } #endif } -TEST(StringUtilTest, stod) { - { - size_t pos = 0; - EXPECT_EQ(0.0, benchmark::stod("0", &pos)); - EXPECT_EQ(1ul, pos); - } - { - size_t pos = 0; - EXPECT_EQ(-84.0, benchmark::stod("-84", &pos)); - EXPECT_EQ(3ul, pos); - } - { - size_t pos = 0; - EXPECT_EQ(1234.0, benchmark::stod("1234", &pos)); - EXPECT_EQ(4ul, pos); - } - { - size_t pos = 0; - EXPECT_EQ(1.5, benchmark::stod("1.5", &pos)); - EXPECT_EQ(3ul, pos); - } - { - size_t pos = 0; - /* Note: exactly representable as double */ - EXPECT_EQ(-1.25e+9, benchmark::stod("-1.25e+9", &pos)); - EXPECT_EQ(8ul, pos); - } +TEST(StringUtilTest, stod){{size_t pos = 0; +EXPECT_EQ(0.0, benchmark::stod("0", &pos)); +EXPECT_EQ(1ul, pos); +} +{ + size_t pos = 0; + EXPECT_EQ(-84.0, benchmark::stod("-84", &pos)); + EXPECT_EQ(3ul, pos); +} +{ + size_t pos = 0; + EXPECT_EQ(1234.0, benchmark::stod("1234", &pos)); + EXPECT_EQ(4ul, pos); +} +{ + size_t pos = 0; + EXPECT_EQ(1.5, benchmark::stod("1.5", &pos)); + EXPECT_EQ(3ul, pos); +} +{ + size_t pos = 0; + /* Note: exactly representable as double */ + EXPECT_EQ(-1.25e+9, benchmark::stod("-1.25e+9", &pos)); + EXPECT_EQ(8ul, pos); +} #ifndef BENCHMARK_HAS_NO_EXCEPTIONS - { - ASSERT_THROW(benchmark::stod("this is a test"), std::invalid_argument); - } +{ ASSERT_THROW(benchmark::stod("this is a test"), std::invalid_argument); } #endif } diff --git a/third-party/benchmark/test/templated_fixture_test.cc b/third-party/benchmark/test/templated_fixture_test.cc index fe9865cc776f..af239c3a725e 100644 --- a/third-party/benchmark/test/templated_fixture_test.cc +++ b/third-party/benchmark/test/templated_fixture_test.cc @@ -1,9 +1,9 @@ -#include "benchmark/benchmark.h" - #include #include +#include "benchmark/benchmark.h" + template class MyFixture : public ::benchmark::Fixture { public: diff --git a/third-party/benchmark/test/user_counters_tabular_test.cc b/third-party/benchmark/test/user_counters_tabular_test.cc index 421f27b5cb8b..45ac043d5193 100644 --- a/third-party/benchmark/test/user_counters_tabular_test.cc +++ b/third-party/benchmark/test/user_counters_tabular_test.cc @@ -18,12 +18,14 @@ ADD_CASES(TC_ConsoleOut, {"^BM_Counters_Tabular/repeats:2/threads:1 %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, {"^BM_Counters_Tabular/repeats:2/threads:1_mean %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, {"^BM_Counters_Tabular/repeats:2/threads:1_median %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, - {"^BM_Counters_Tabular/repeats:2/threads:1_stddev %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, + {"^BM_Counters_Tabular/repeats:2/threads:1_stddev %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, + {"^BM_Counters_Tabular/repeats:2/threads:1_cv %console_percentage_report [ ]*%percentage[ ]*% [ ]*%percentage[ ]*% [ ]*%percentage[ ]*% [ ]*%percentage[ ]*% [ ]*%percentage[ ]*% [ ]*%percentage[ ]*%$", MR_Next}, {"^BM_Counters_Tabular/repeats:2/threads:2 %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, {"^BM_Counters_Tabular/repeats:2/threads:2 %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, {"^BM_Counters_Tabular/repeats:2/threads:2_mean %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, {"^BM_Counters_Tabular/repeats:2/threads:2_median %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, - {"^BM_Counters_Tabular/repeats:2/threads:2_stddev %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, + {"^BM_Counters_Tabular/repeats:2/threads:2_stddev %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, + {"^BM_Counters_Tabular/repeats:2/threads:2_cv %console_percentage_report [ ]*%percentage[ ]*% [ ]*%percentage[ ]*% [ ]*%percentage[ ]*% [ ]*%percentage[ ]*% [ ]*%percentage[ ]*% [ ]*%percentage[ ]*%$", MR_Next}, {"^BM_CounterRates_Tabular/threads:%int %console_report [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s$", MR_Next}, {"^BM_CounterRates_Tabular/threads:%int %console_report [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s$", MR_Next}, {"^BM_CounterRates_Tabular/threads:%int %console_report [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s$", MR_Next}, @@ -125,6 +127,7 @@ ADD_CASES(TC_JSONOut, {"\"repetitions\": 2,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"mean\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, @@ -146,6 +149,7 @@ ADD_CASES(TC_JSONOut, {"\"repetitions\": 2,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"median\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, @@ -167,6 +171,29 @@ ADD_CASES(TC_JSONOut, {"\"repetitions\": 2,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"stddev\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\",$", MR_Next}, + {"\"Bar\": %float,$", MR_Next}, + {"\"Bat\": %float,$", MR_Next}, + {"\"Baz\": %float,$", MR_Next}, + {"\"Foo\": %float,$", MR_Next}, + {"\"Frob\": %float,$", MR_Next}, + {"\"Lob\": %float$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_JSONOut, + {{"\"name\": \"BM_Counters_Tabular/repeats:2/threads:1_cv\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_Counters_Tabular/repeats:2/threads:1\",$", + MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 2,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"aggregate_name\": \"cv\",$", MR_Next}, + {"\"aggregate_unit\": \"percentage\",$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, @@ -231,6 +258,7 @@ ADD_CASES(TC_JSONOut, {"\"repetitions\": 2,$", MR_Next}, {"\"threads\": 2,$", MR_Next}, {"\"aggregate_name\": \"median\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, @@ -252,6 +280,29 @@ ADD_CASES(TC_JSONOut, {"\"repetitions\": 2,$", MR_Next}, {"\"threads\": 2,$", MR_Next}, {"\"aggregate_name\": \"stddev\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\",$", MR_Next}, + {"\"Bar\": %float,$", MR_Next}, + {"\"Bat\": %float,$", MR_Next}, + {"\"Baz\": %float,$", MR_Next}, + {"\"Foo\": %float,$", MR_Next}, + {"\"Frob\": %float,$", MR_Next}, + {"\"Lob\": %float$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_JSONOut, + {{"\"name\": \"BM_Counters_Tabular/repeats:2/threads:2_cv\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 1,$", MR_Next}, + {"\"run_name\": \"BM_Counters_Tabular/repeats:2/threads:2\",$", + MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 2,$", MR_Next}, + {"\"threads\": 2,$", MR_Next}, + {"\"aggregate_name\": \"cv\",$", MR_Next}, + {"\"aggregate_unit\": \"percentage\",$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, @@ -278,6 +329,9 @@ ADD_CASES(TC_CSVOut, ADD_CASES(TC_CSVOut, {{"^\"BM_Counters_Tabular/repeats:2/threads:1_stddev\",%csv_report," "%float,%float,%float,%float,%float,%float$"}}); +ADD_CASES(TC_CSVOut, + {{"^\"BM_Counters_Tabular/repeats:2/threads:1_cv\",%csv_report," + "%float,%float,%float,%float,%float,%float$"}}); ADD_CASES(TC_CSVOut, {{"^\"BM_Counters_Tabular/repeats:2/threads:2\",%csv_report," "%float,%float,%float,%float,%float,%float$"}}); @@ -293,6 +347,9 @@ ADD_CASES(TC_CSVOut, ADD_CASES(TC_CSVOut, {{"^\"BM_Counters_Tabular/repeats:2/threads:2_stddev\",%csv_report," "%float,%float,%float,%float,%float,%float$"}}); +ADD_CASES(TC_CSVOut, + {{"^\"BM_Counters_Tabular/repeats:2/threads:2_cv\",%csv_report," + "%float,%float,%float,%float,%float,%float$"}}); // VS2013 does not allow this function to be passed as a lambda argument // to CHECK_BENCHMARK_RESULTS() void CheckTabular(Results const& e) { diff --git a/third-party/benchmark/test/user_counters_test.cc b/third-party/benchmark/test/user_counters_test.cc index 377bb32ca948..1cc74552a1bd 100644 --- a/third-party/benchmark/test/user_counters_test.cc +++ b/third-party/benchmark/test/user_counters_test.cc @@ -26,7 +26,7 @@ void BM_Counters_Simple(benchmark::State& state) { for (auto _ : state) { } state.counters["foo"] = 1; - state.counters["bar"] = 2 * (double)state.iterations(); + state.counters["bar"] = 2 * static_cast(state.iterations()); } BENCHMARK(BM_Counters_Simple); ADD_CASES(TC_ConsoleOut, diff --git a/third-party/benchmark/test/user_counters_thousands_test.cc b/third-party/benchmark/test/user_counters_thousands_test.cc index bbe194264ed4..a42683b32fa7 100644 --- a/third-party/benchmark/test/user_counters_thousands_test.cc +++ b/third-party/benchmark/test/user_counters_thousands_test.cc @@ -96,6 +96,7 @@ ADD_CASES(TC_JSONOut, {"\"repetitions\": 2,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"mean\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 2,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, @@ -115,6 +116,7 @@ ADD_CASES(TC_JSONOut, {"\"repetitions\": 2,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"median\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 2,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, @@ -134,6 +136,7 @@ ADD_CASES(TC_JSONOut, {"\"repetitions\": 2,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"stddev\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 2,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, diff --git a/third-party/benchmark/tools/gbench/Inputs/test4_run0.json b/third-party/benchmark/tools/gbench/Inputs/test4_run0.json new file mode 100644 index 000000000000..54cf127585cc --- /dev/null +++ b/third-party/benchmark/tools/gbench/Inputs/test4_run0.json @@ -0,0 +1,21 @@ +{ + "context": { + "date": "2016-08-02 17:44:46", + "num_cpus": 4, + "mhz_per_cpu": 4228, + "cpu_scaling_enabled": false, + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "whocares", + "run_type": "aggregate", + "aggregate_name": "zz", + "aggregate_unit": "percentage", + "iterations": 1000, + "real_time": 0.01, + "cpu_time": 0.10, + "time_unit": "ns" + } + ] +} diff --git a/third-party/benchmark/tools/gbench/Inputs/test4_run1.json b/third-party/benchmark/tools/gbench/Inputs/test4_run1.json new file mode 100644 index 000000000000..25d56050c9f4 --- /dev/null +++ b/third-party/benchmark/tools/gbench/Inputs/test4_run1.json @@ -0,0 +1,21 @@ +{ + "context": { + "date": "2016-08-02 17:44:46", + "num_cpus": 4, + "mhz_per_cpu": 4228, + "cpu_scaling_enabled": false, + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "whocares", + "run_type": "aggregate", + "aggregate_name": "zz", + "aggregate_unit": "percentage", + "iterations": 1000, + "real_time": 0.005, + "cpu_time": 0.15, + "time_unit": "ns" + } + ] +} diff --git a/third-party/benchmark/tools/gbench/report.py b/third-party/benchmark/tools/gbench/report.py index 6bea82f6bf7b..4c798baf69f2 100644 --- a/third-party/benchmark/tools/gbench/report.py +++ b/third-party/benchmark/tools/gbench/report.py @@ -7,7 +7,9 @@ import re import copy import random -from scipy.stats import mannwhitneyu +from scipy.stats import mannwhitneyu, gmean +from numpy import array +from pandas import Timedelta class BenchmarkColor(object): @@ -150,6 +152,30 @@ def partition_benchmarks(json1, json2): return partitions +def get_timedelta_field_as_seconds(benchmark, field_name): + """ + Get value of field_name field of benchmark, which is time with time unit + time_unit, as time in seconds. + """ + time_unit = benchmark['time_unit'] if 'time_unit' in benchmark else 's' + dt = Timedelta(benchmark[field_name], time_unit) + return dt / Timedelta(1, 's') + + +def calculate_geomean(json): + """ + Extract all real/cpu times from all the benchmarks as seconds, + and calculate their geomean. + """ + times = [] + for benchmark in json['benchmarks']: + if 'run_type' in benchmark and benchmark['run_type'] == 'aggregate': + continue + times.append([get_timedelta_field_as_seconds(benchmark, 'real_time'), + get_timedelta_field_as_seconds(benchmark, 'cpu_time')]) + return gmean(times) if times else array([]) + + def extract_field(partition, field_name): # The count of elements may be different. We want *all* of them. lhs = [x[field_name] for x in partition[0]] @@ -174,6 +200,7 @@ def calc_utest(timings_cpu, timings_time): return (min_rep_cnt >= UTEST_OPTIMAL_REPETITIONS), cpu_pvalue, time_pvalue + def print_utest(bc_name, utest, utest_alpha, first_col_width, use_color=True): def get_utest_color(pval): return BC_FAIL if pval >= utest_alpha else BC_OKGREEN @@ -242,7 +269,8 @@ def get_difference_report( if utest: timings_cpu = extract_field(partition, 'cpu_time') timings_time = extract_field(partition, 'real_time') - have_optimal_repetitions, cpu_pvalue, time_pvalue = calc_utest(timings_cpu, timings_time) + have_optimal_repetitions, cpu_pvalue, time_pvalue = calc_utest( + timings_cpu, timings_time) if cpu_pvalue and time_pvalue: utest_results = { 'have_optimal_repetitions': have_optimal_repetitions, @@ -268,6 +296,25 @@ def get_difference_report( 'utest': utest_results }) + lhs_gmean = calculate_geomean(json1) + rhs_gmean = calculate_geomean(json2) + if lhs_gmean.any() and rhs_gmean.any(): + diff_report.append({ + 'name': 'OVERALL_GEOMEAN', + 'measurements': [{ + 'real_time': lhs_gmean[0], + 'cpu_time': lhs_gmean[1], + 'real_time_other': rhs_gmean[0], + 'cpu_time_other': rhs_gmean[1], + 'time': calculate_change(lhs_gmean[0], rhs_gmean[0]), + 'cpu': calculate_change(lhs_gmean[1], rhs_gmean[1]) + }], + 'time_unit': 's', + 'run_type': 'aggregate', + 'aggregate_name': 'geomean', + 'utest': {} + }) + return diff_report @@ -307,19 +354,19 @@ def print_difference_report( if not include_aggregates_only or not 'run_type' in benchmark or benchmark['run_type'] == 'aggregate': for measurement in benchmark['measurements']: output_strs += [color_format(use_color, - fmt_str, - BC_HEADER, - benchmark['name'], - first_col_width, - get_color(measurement['time']), - measurement['time'], - get_color(measurement['cpu']), - measurement['cpu'], - measurement['real_time'], - measurement['real_time_other'], - measurement['cpu_time'], - measurement['cpu_time_other'], - endc=BC_ENDC)] + fmt_str, + BC_HEADER, + benchmark['name'], + first_col_width, + get_color(measurement['time']), + measurement['time'], + get_color(measurement['cpu']), + measurement['cpu'], + measurement['real_time'], + measurement['real_time_other'], + measurement['cpu_time'], + measurement['cpu_time_other'], + endc=BC_ENDC)] # After processing the measurements, if requested and # if applicable (e.g. u-test exists for given benchmark), @@ -403,6 +450,7 @@ class TestReportDifference(unittest.TestCase): '-0.1000', '100', '110', '100', '90'], ['BM_ThirdFaster', '-0.3333', '-0.3334', '100', '67', '100', '67'], ['BM_NotBadTimeUnit', '-0.9000', '+0.2000', '0', '0', '0', '1'], + ['OVERALL_GEOMEAN', '-0.8344', '-0.8026', '0', '0', '0', '0'] ] output_lines_with_header = print_difference_report( self.json_diff_report, use_color=False) @@ -489,6 +537,15 @@ class TestReportDifference(unittest.TestCase): 'time_unit': 's', 'utest': {} }, + { + 'name': 'OVERALL_GEOMEAN', + 'measurements': [{'real_time': 1.193776641714438e-06, 'cpu_time': 1.2144445585302297e-06, + 'real_time_other': 1.9768988699420897e-07, 'cpu_time_other': 2.397447755209533e-07, + 'time': -0.834399601997324, 'cpu': -0.8025889499549471}], + 'time_unit': 's', + 'run_type': 'aggregate', + 'aggregate_name': 'geomean', 'utest': {} + }, ] self.assertEqual(len(self.json_diff_report), len(expected_output)) for out, expected in zip( @@ -524,6 +581,7 @@ class TestReportDifferenceBetweenFamilies(unittest.TestCase): ['./4', '-0.5000', '-0.5000', '40', '20', '40', '20'], ['Prefix/.', '-0.5000', '-0.5000', '20', '10', '20', '10'], ['Prefix/./3', '-0.5000', '-0.5000', '30', '15', '30', '15'], + ['OVERALL_GEOMEAN', '-0.5000', '-0.5000', '0', '0', '0', '0'] ] output_lines_with_header = print_difference_report( self.json_diff_report, use_color=False) @@ -561,6 +619,16 @@ class TestReportDifferenceBetweenFamilies(unittest.TestCase): 'measurements': [{'time': -0.5, 'cpu': -0.5, 'real_time': 30, 'real_time_other': 15, 'cpu_time': 30, 'cpu_time_other': 15}], 'time_unit': 'ns', 'utest': {} + }, + { + 'name': 'OVERALL_GEOMEAN', + 'measurements': [{'real_time': 2.213363839400641e-08, 'cpu_time': 2.213363839400641e-08, + 'real_time_other': 1.1066819197003185e-08, 'cpu_time_other': 1.1066819197003185e-08, + 'time': -0.5000000000000009, 'cpu': -0.5000000000000009}], + 'time_unit': 's', + 'run_type': 'aggregate', + 'aggregate_name': 'geomean', + 'utest': {} } ] self.assertEqual(len(self.json_diff_report), len(expected_output)) @@ -599,8 +667,8 @@ class TestReportDifferenceWithUTest(unittest.TestCase): ['BM_Two', '+0.1111', '-0.0111', '9', '10', '90', '89'], ['BM_Two', '-0.1250', '-0.1628', '8', '7', '86', '72'], ['BM_Two_pvalue', - '0.6985', - '0.6985', + '1.0000', + '0.6667', 'U', 'Test,', 'Repetitions:', @@ -617,7 +685,7 @@ class TestReportDifferenceWithUTest(unittest.TestCase): ['short', '-0.4325', '-0.1351', '8', '5', '77', '67'], ['short_pvalue', '0.7671', - '0.1489', + '0.2000', 'U', 'Test,', 'Repetitions:', @@ -631,6 +699,7 @@ class TestReportDifferenceWithUTest(unittest.TestCase): 'repetitions', 'recommended.'], ['medium', '-0.3750', '-0.3375', '8', '5', '80', '53'], + ['OVERALL_GEOMEAN', '+1.6405', '-0.6985', '0', '0', '0', '0'] ] output_lines_with_header = print_difference_report( self.json_diff_report, utest=True, utest_alpha=0.05, use_color=False) @@ -646,8 +715,8 @@ class TestReportDifferenceWithUTest(unittest.TestCase): expect_lines = [ ['BM_One', '-0.1000', '+0.1000', '10', '9', '100', '110'], ['BM_Two_pvalue', - '0.6985', - '0.6985', + '1.0000', + '0.6667', 'U', 'Test,', 'Repetitions:', @@ -664,7 +733,7 @@ class TestReportDifferenceWithUTest(unittest.TestCase): ['short', '-0.4325', '-0.1351', '8', '5', '77', '67'], ['short_pvalue', '0.7671', - '0.1489', + '0.2000', 'U', 'Test,', 'Repetitions:', @@ -677,6 +746,7 @@ class TestReportDifferenceWithUTest(unittest.TestCase): '9+', 'repetitions', 'recommended.'], + ['OVERALL_GEOMEAN', '+1.6405', '-0.6985', '0', '0', '0', '0'] ] output_lines_with_header = print_difference_report( self.json_diff_report, include_aggregates_only=True, utest=True, utest_alpha=0.05, use_color=False) @@ -717,7 +787,7 @@ class TestReportDifferenceWithUTest(unittest.TestCase): ], 'time_unit': 'ns', 'utest': { - 'have_optimal_repetitions': False, 'cpu_pvalue': 0.6985353583033387, 'time_pvalue': 0.6985353583033387 + 'have_optimal_repetitions': False, 'cpu_pvalue': 0.6666666666666666, 'time_pvalue': 1.0 } }, { @@ -738,7 +808,7 @@ class TestReportDifferenceWithUTest(unittest.TestCase): ], 'time_unit': 'ns', 'utest': { - 'have_optimal_repetitions': False, 'cpu_pvalue': 0.14891467317876572, 'time_pvalue': 0.7670968684102772 + 'have_optimal_repetitions': False, 'cpu_pvalue': 0.2, 'time_pvalue': 0.7670968684102772 } }, { @@ -753,6 +823,16 @@ class TestReportDifferenceWithUTest(unittest.TestCase): ], 'time_unit': 'ns', 'utest': {} + }, + { + 'name': 'OVERALL_GEOMEAN', + 'measurements': [{'real_time': 8.48528137423858e-09, 'cpu_time': 8.441336246629233e-08, + 'real_time_other': 2.2405267593145244e-08, 'cpu_time_other': 2.5453661413660466e-08, + 'time': 1.6404861082353634, 'cpu': -0.6984640740519662}], + 'time_unit': 's', + 'run_type': 'aggregate', + 'aggregate_name': 'geomean', + 'utest': {} } ] self.assertEqual(len(self.json_diff_report), len(expected_output)) @@ -792,8 +872,8 @@ class TestReportDifferenceWithUTestWhileDisplayingAggregatesOnly( ['BM_Two', '+0.1111', '-0.0111', '9', '10', '90', '89'], ['BM_Two', '-0.1250', '-0.1628', '8', '7', '86', '72'], ['BM_Two_pvalue', - '0.6985', - '0.6985', + '1.0000', + '0.6667', 'U', 'Test,', 'Repetitions:', @@ -810,7 +890,7 @@ class TestReportDifferenceWithUTestWhileDisplayingAggregatesOnly( ['short', '-0.4325', '-0.1351', '8', '5', '77', '67'], ['short_pvalue', '0.7671', - '0.1489', + '0.2000', 'U', 'Test,', 'Repetitions:', @@ -823,7 +903,8 @@ class TestReportDifferenceWithUTestWhileDisplayingAggregatesOnly( '9+', 'repetitions', 'recommended.'], - ['medium', '-0.3750', '-0.3375', '8', '5', '80', '53'] + ['medium', '-0.3750', '-0.3375', '8', '5', '80', '53'], + ['OVERALL_GEOMEAN', '+1.6405', '-0.6985', '0', '0', '0', '0'] ] output_lines_with_header = print_difference_report( self.json_diff_report, @@ -865,7 +946,7 @@ class TestReportDifferenceWithUTestWhileDisplayingAggregatesOnly( ], 'time_unit': 'ns', 'utest': { - 'have_optimal_repetitions': False, 'cpu_pvalue': 0.6985353583033387, 'time_pvalue': 0.6985353583033387 + 'have_optimal_repetitions': False, 'cpu_pvalue': 0.6666666666666666, 'time_pvalue': 1.0 } }, { @@ -886,7 +967,7 @@ class TestReportDifferenceWithUTestWhileDisplayingAggregatesOnly( ], 'time_unit': 'ns', 'utest': { - 'have_optimal_repetitions': False, 'cpu_pvalue': 0.14891467317876572, 'time_pvalue': 0.7670968684102772 + 'have_optimal_repetitions': False, 'cpu_pvalue': 0.2, 'time_pvalue': 0.7670968684102772 } }, { @@ -898,11 +979,83 @@ class TestReportDifferenceWithUTestWhileDisplayingAggregatesOnly( 'real_time': 8, 'cpu_time_other': 53, 'cpu': -0.3375 - } + } ], 'utest': {}, 'time_unit': u'ns', 'aggregate_name': '' + }, + { + 'name': 'OVERALL_GEOMEAN', + 'measurements': [{'real_time': 8.48528137423858e-09, 'cpu_time': 8.441336246629233e-08, + 'real_time_other': 2.2405267593145244e-08, 'cpu_time_other': 2.5453661413660466e-08, + 'time': 1.6404861082353634, 'cpu': -0.6984640740519662}], + 'time_unit': 's', + 'run_type': 'aggregate', + 'aggregate_name': 'geomean', + 'utest': {} + } + ] + self.assertEqual(len(self.json_diff_report), len(expected_output)) + for out, expected in zip( + self.json_diff_report, expected_output): + self.assertEqual(out['name'], expected['name']) + self.assertEqual(out['time_unit'], expected['time_unit']) + assert_utest(self, out, expected) + assert_measurements(self, out, expected) + + +class TestReportDifferenceForPercentageAggregates( + unittest.TestCase): + @classmethod + def setUpClass(cls): + def load_results(): + import json + testInputs = os.path.join( + os.path.dirname( + os.path.realpath(__file__)), + 'Inputs') + testOutput1 = os.path.join(testInputs, 'test4_run0.json') + testOutput2 = os.path.join(testInputs, 'test4_run1.json') + with open(testOutput1, 'r') as f: + json1 = json.load(f) + with open(testOutput2, 'r') as f: + json2 = json.load(f) + return json1, json2 + + json1, json2 = load_results() + cls.json_diff_report = get_difference_report( + json1, json2, utest=True) + + def test_json_diff_report_pretty_printing(self): + expect_lines = [ + ['whocares', '-0.5000', '+0.5000', '0', '0', '0', '0'] + ] + output_lines_with_header = print_difference_report( + self.json_diff_report, + utest=True, utest_alpha=0.05, use_color=False) + output_lines = output_lines_with_header[2:] + print("\n") + print("\n".join(output_lines_with_header)) + self.assertEqual(len(output_lines), len(expect_lines)) + for i in range(0, len(output_lines)): + parts = [x for x in output_lines[i].split(' ') if x] + self.assertEqual(expect_lines[i], parts) + + def test_json_diff_report(self): + expected_output = [ + { + 'name': u'whocares', + 'measurements': [ + {'time': -0.5, + 'cpu': 0.5, + 'real_time': 0.01, + 'real_time_other': 0.005, + 'cpu_time': 0.10, + 'cpu_time_other': 0.15} + ], + 'time_unit': 'ns', + 'utest': {} } ] self.assertEqual(len(self.json_diff_report), len(expected_output)) diff --git a/third-party/update_benchmark.sh b/third-party/update_benchmark.sh new file mode 100755 index 000000000000..6d131d77ebf1 --- /dev/null +++ b/third-party/update_benchmark.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +echo "This script deletes `benchmark`, clones it from github, together" +echo "with its dependencies. It then removes .git* files and dirs." +echo "NOTE!!!" +echo "Please double-check the benchmark github wiki for any changes" +echo "to dependencies. Currently, these are limited to googletest." +echo +read -p "Press a key to continue, or Ctrl+C to cancel" + +rm -rf benchmark +git clone https://github.com/google/benchmark.git +rm -rf benchmark/.git* +