2021-11-24 12:48:50 +08:00
|
|
|
#!/bin/bash
|
2021-12-24 02:14:27 +08:00
|
|
|
##===- bolt/utils/bughunter.sh - Help locate BOLT bugs -------*- Script -*-===##
|
2021-11-24 12:48:50 +08:00
|
|
|
#
|
2021-12-24 02:14:27 +08:00
|
|
|
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
# See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
|
|
# details.
|
2021-11-24 12:48:50 +08:00
|
|
|
#
|
2021-12-24 02:14:27 +08:00
|
|
|
##===----------------------------------------------------------------------===##
|
2021-11-24 12:48:50 +08:00
|
|
|
#
|
|
|
|
# This script attempts to narrow down llvm-bolt bug to a single function in the
|
|
|
|
# input binary.
|
|
|
|
#
|
|
|
|
# If such a function is found, llvm-bolt could be run just on this function
|
|
|
|
# to mitigate debugging process.
|
|
|
|
#
|
|
|
|
# The following envvars are used by this script:
|
|
|
|
#
|
|
|
|
# BOLT - path to llvm-bolt
|
|
|
|
#
|
|
|
|
# BOLT_OPTIONS - options to be used by llvm-bolt
|
|
|
|
#
|
|
|
|
# INPUT_BINARY - input for llvm-bolt
|
|
|
|
#
|
|
|
|
# PRE_COMMAND - command to execute prior to running optimized binary
|
|
|
|
#
|
|
|
|
# POST_COMMAND - command to filter results of running optimized binary
|
|
|
|
#
|
|
|
|
# TIMEOUT_OR_CMD - optional timeout or command on optimized binary command
|
|
|
|
# if the value is a number with an optional trailing letter
|
|
|
|
# [smhd] it is considered a paramter to "timeout",
|
|
|
|
# otherwise it's a shell command that wraps the optimized
|
|
|
|
# binary command.
|
|
|
|
#
|
|
|
|
# COMMAND_LINE - command line options to run optimized binary with
|
|
|
|
#
|
|
|
|
# IGNORE_ERROR - ignore error codes returned from optimized binary
|
|
|
|
#
|
|
|
|
# GOLD_FILE - file containing expected output from optimized binary
|
|
|
|
#
|
|
|
|
# FUNC_NAMES - if set, path to an initial list of function names to
|
|
|
|
# search. Otherwise, nm is used on the original binary.
|
|
|
|
#
|
|
|
|
# OFFLINE - if set, bughunter will produce the binaries but will not
|
|
|
|
# run them, and will depend on you telling whether it
|
|
|
|
# succeeded or not.
|
|
|
|
#
|
|
|
|
# MAX_FUNCS - if set, use -max-funcs to narrow down the offending
|
|
|
|
# function. if non-zero, start -max-funcs at $MAX_FUNCS
|
|
|
|
# otherwise, count the number of symbols in the binary.
|
|
|
|
#
|
|
|
|
# MAX_FUNCS_FLAG - BOLT command line option to use for MAX_FUNCS search.
|
|
|
|
# Default is -max-funcs. Can also be used for relocation
|
|
|
|
# debugging, e.g. -max-data-relocations.
|
|
|
|
#
|
|
|
|
# VERBOSE - if non-empty, set the script to echo mode.
|
|
|
|
#
|
2021-12-24 02:14:27 +08:00
|
|
|
##===----------------------------------------------------------------------===##
|
|
|
|
|
2021-11-24 12:48:50 +08:00
|
|
|
BOLT=${BOLT:=llvm-bolt}
|
|
|
|
|
|
|
|
ulimit -c 0
|
|
|
|
set -o pipefail
|
|
|
|
|
|
|
|
if [[ -n "$VERBOSE" ]]; then
|
|
|
|
set -x
|
|
|
|
fi
|
|
|
|
|
|
|
|
if [[ ! -x $INPUT_BINARY ]] ; then
|
|
|
|
echo "INPUT_BINARY must be set to an executable file"
|
|
|
|
exit 1
|
|
|
|
fi
|
|
|
|
|
|
|
|
if [[ -z "$PRE_COMMAND" ]] ; then
|
|
|
|
PRE_COMMAND=':'
|
|
|
|
fi
|
|
|
|
|
|
|
|
if [[ -z "$POST_COMMAND" ]] ; then
|
|
|
|
POST_COMMAND='cat'
|
|
|
|
fi
|
|
|
|
|
|
|
|
if [[ -n "$TIMEOUT_OR_CMD" && $TIMEOUT_OR_CMD =~ ^[0-9]+[smhd]?$ ]] ; then
|
|
|
|
TIMEOUT_OR_CMD="timeout -s KILL $TIMEOUT_OR_CMD"
|
|
|
|
fi
|
|
|
|
|
|
|
|
if [[ -z "$MAX_FUNCS_FLAG" ]] ; then
|
|
|
|
MAX_FUNCS_FLAG="-max-funcs"
|
|
|
|
fi
|
|
|
|
|
|
|
|
OPTIMIZED_BINARY=$(mktemp -t -u --suffix=.bolt $(basename ${INPUT_BINARY}).XXX)
|
|
|
|
OUTPUT_FILE="${OPTIMIZED_BINARY}.out"
|
|
|
|
BOLT_LOG=$(mktemp -t -u --suffix=.log boltXXX)
|
|
|
|
|
|
|
|
if [[ -z $OFFLINE ]]; then
|
|
|
|
echo "Verify input binary passes"
|
|
|
|
echo " INPUT_BINARY: $PRE_COMMAND && $TIMEOUT_OR_CMD $INPUT_BINARY $COMMAND_LINE |& $POST_COMMAND >& $OUTPUT_FILE"
|
|
|
|
($PRE_COMMAND && $TIMEOUT_OR_CMD $INPUT_BINARY $COMMAND_LINE |& $POST_COMMAND >& $OUTPUT_FILE)
|
|
|
|
STATUS=$?
|
|
|
|
if [[ "$IGNORE_ERROR" == "1" ]]; then
|
|
|
|
FAIL=0
|
|
|
|
else
|
|
|
|
FAIL=$STATUS
|
|
|
|
fi
|
|
|
|
if [[ -e "$GOLD_FILE" ]] ; then
|
|
|
|
cmp -s "$OUTPUT_FILE" "$GOLD_FILE"
|
|
|
|
FAIL=$?
|
|
|
|
fi
|
|
|
|
if [[ $FAIL -ne "0" ]] ; then
|
|
|
|
echo " Warning: input binary failed"
|
|
|
|
else
|
|
|
|
echo " Input binary passes."
|
|
|
|
fi
|
|
|
|
fi
|
|
|
|
|
|
|
|
echo "Verify optimized binary fails"
|
|
|
|
($BOLT $BOLT_OPTIONS $INPUT_BINARY -o $OPTIMIZED_BINARY >& $BOLT_LOG)
|
|
|
|
FAIL=$?
|
|
|
|
if [[ $FAIL -eq "0" ]]; then
|
|
|
|
if [[ -z $OFFLINE ]]; then
|
|
|
|
echo " OPTIMIZED_BINARY: $PRE_COMMAND && $TIMEOUT_OR_CMD $OPTIMIZED_BINARY $COMMAND_LINE |& $POST_COMMAND >& $OUTPUT_FILE"
|
|
|
|
($PRE_COMMAND && $TIMEOUT_OR_CMD $OPTIMIZED_BINARY $COMMAND_LINE |& $POST_COMMAND >& $OUTPUT_FILE)
|
|
|
|
STATUS=$?
|
|
|
|
if [[ "$IGNORE_ERROR" == "1" ]]; then
|
|
|
|
FAIL=0
|
|
|
|
else
|
|
|
|
FAIL=$STATUS
|
|
|
|
fi
|
|
|
|
if [[ -e "$GOLD_FILE" ]] ; then
|
|
|
|
cmp -s "$OUTPUT_FILE" "$GOLD_FILE"
|
|
|
|
FAIL=$?
|
|
|
|
fi
|
|
|
|
else
|
|
|
|
echo "Did it pass? Type the return code [0 = pass, 1 = fail]"
|
|
|
|
read -n1 PASS
|
|
|
|
fi
|
|
|
|
if [[ $FAIL -eq "0" ]] ; then
|
|
|
|
echo " Warning: optimized binary passes."
|
|
|
|
else
|
|
|
|
echo " Optimized binary fails as expected."
|
|
|
|
fi
|
|
|
|
else
|
|
|
|
echo " Bolt crashes while generating optimized binary."
|
|
|
|
fi
|
|
|
|
|
|
|
|
# Collect function names
|
|
|
|
FF=$(mktemp -t -u --suffix=.txt func-names.XXX)
|
|
|
|
nm --defined-only -p $INPUT_BINARY | grep " [TtWw] " | cut -d ' ' -f 3 | egrep -v "\._" | egrep -v '^$' | sort -u > $FF
|
|
|
|
|
|
|
|
# Use function names or numbers
|
|
|
|
if [[ -z "$MAX_FUNCS" ]] ; then
|
|
|
|
# Do binary search on function names
|
|
|
|
if [[ -n "$FUNC_NAMES" ]]; then
|
|
|
|
FF=$FUNC_NAMES
|
|
|
|
fi
|
|
|
|
NUM_FUNCS=$(wc -l $FF | cut -d ' ' -f 1)
|
|
|
|
HALF=$(expr \( $NUM_FUNCS + 1 \) / 2)
|
|
|
|
PREFIX=$(mktemp -t -u --suffix=.txt func-names.XXX)
|
|
|
|
FF0=$PREFIX\aa
|
|
|
|
FF1=$PREFIX\ab
|
|
|
|
split -a 2 -l $HALF $FF $PREFIX
|
|
|
|
FF=$FF0
|
|
|
|
NUM_FUNCS=$(wc -l $FF | cut -d ' ' -f 1)
|
|
|
|
CONTINUE=$(expr $NUM_FUNCS \> 0)
|
|
|
|
else
|
|
|
|
P=0
|
|
|
|
if [[ "$MAX_FUNCS" -eq "0" ]]; then
|
|
|
|
Q=$(wc -l $FF | cut -d ' ' -f 1)
|
|
|
|
else
|
|
|
|
Q=$MAX_FUNCS
|
|
|
|
fi
|
|
|
|
I=$Q
|
|
|
|
CONTINUE=$(expr \( $Q - $P \) \> 1)
|
|
|
|
fi
|
|
|
|
|
|
|
|
ITER=0
|
|
|
|
while [[ "$CONTINUE" -ne "0" ]] ; do
|
|
|
|
rm -f $OPTIMIZED_BINARY
|
|
|
|
if [[ -z "$MAX_FUNCS" ]] ; then
|
|
|
|
echo "Iteration $ITER, trying $FF / $HALF functions"
|
|
|
|
SEARCH_OPT="-funcs-file-no-regex=$FF"
|
|
|
|
else
|
|
|
|
I=$(expr \( $Q + $P \) / 2)
|
|
|
|
echo "Iteration $ITER, P=$P, Q=$Q, I=$I"
|
|
|
|
SEARCH_OPT="$MAX_FUNCS_FLAG=$I"
|
|
|
|
fi
|
|
|
|
echo " BOLT: $BOLT $BOLT_OPTIONS $INPUT_BINARY $SEARCH_OPT -o $OPTIMIZED_BINARY >& $BOLT_LOG"
|
|
|
|
($BOLT $BOLT_OPTIONS $INPUT_BINARY $SEARCH_OPT -o $OPTIMIZED_BINARY >& $BOLT_LOG)
|
|
|
|
FAIL=$?
|
|
|
|
echo " BOLT failure=$FAIL"
|
|
|
|
rm -f $OUTPUT_FILE
|
|
|
|
if [[ $FAIL -eq "0" ]] ; then
|
|
|
|
if [[ -z $OFFLINE ]]; then
|
|
|
|
echo " OPTIMIZED_BINARY: $PRE_COMMAND && $TIMEOUT_OR_CMD $OPTIMIZED_BINARY $COMMAND_LINE |& $POST_COMMAND >& $OUTPUT_FILE"
|
|
|
|
($PRE_COMMAND && $TIMEOUT_OR_CMD $OPTIMIZED_BINARY $COMMAND_LINE |& $POST_COMMAND >& $OUTPUT_FILE)
|
|
|
|
STATUS=$?
|
|
|
|
if [[ "$IGNORE_ERROR" == "1" ]]; then
|
|
|
|
FAIL=0
|
|
|
|
else
|
|
|
|
FAIL=$STATUS
|
|
|
|
fi
|
|
|
|
if [[ -e "$GOLD_FILE" ]] ; then
|
|
|
|
cmp -s "$OUTPUT_FILE" "$GOLD_FILE"
|
|
|
|
FAIL=$?
|
|
|
|
fi
|
|
|
|
echo " OPTIMIZED_BINARY failure=$FAIL"
|
|
|
|
else
|
|
|
|
echo "Did it pass? Type the return code [0 = pass, 1 = fail]"
|
|
|
|
read -n1 PASS
|
|
|
|
fi
|
|
|
|
else
|
|
|
|
FAIL=1
|
|
|
|
fi
|
|
|
|
|
|
|
|
if [[ -z "$MAX_FUNCS" ]] ; then
|
|
|
|
if [[ $FAIL -eq "0" ]] ; then
|
|
|
|
if [[ "$FF" == "$FF1" ]]; then
|
|
|
|
NUM_FUNCS=0
|
|
|
|
break;
|
|
|
|
fi
|
|
|
|
FF=$FF1
|
|
|
|
NUM_FUNCS=$(wc -l $FF | cut -d ' ' -f 1)
|
|
|
|
else
|
|
|
|
HALF=$(expr \( $NUM_FUNCS + 1 \) / 2)
|
|
|
|
PREFIX=$(mktemp -t -u --suffix=.txt func-names.XXX)
|
|
|
|
split -a 2 -l $HALF $FF $PREFIX
|
|
|
|
FF0=$PREFIX\aa
|
|
|
|
FF1=$PREFIX\ab
|
|
|
|
FF=$FF0
|
|
|
|
NUM_FUNCS=$(wc -l $FF | cut -d ' ' -f 1)
|
|
|
|
if [[ $NUM_FUNCS -eq "1" && ! -e $FF1 ]]; then
|
|
|
|
break;
|
|
|
|
fi
|
|
|
|
fi
|
|
|
|
CONTINUE=$(expr $NUM_FUNCS \> 0)
|
|
|
|
else
|
|
|
|
if [[ $FAIL -eq "0" ]] ; then
|
|
|
|
P=$I
|
|
|
|
else
|
|
|
|
Q=$I
|
|
|
|
fi
|
|
|
|
FF=$I
|
|
|
|
HALF=$I
|
|
|
|
CONTINUE=$(expr \( $Q - $P \) \> 1)
|
|
|
|
fi
|
|
|
|
ITER=$(expr $ITER + 1)
|
|
|
|
done
|
|
|
|
|
|
|
|
if [[ -z "$MAX_FUNCS" ]] ; then
|
|
|
|
if [[ "$NUM_FUNCS" -ne "0" ]] ; then
|
|
|
|
FAILED="The function(s) that failed are in $FF"
|
|
|
|
fi
|
|
|
|
else
|
|
|
|
if [[ $P -ne $Q ]] ; then
|
|
|
|
FF=$(grep "processing ending" $BOLT_LOG | sed -e "s/BOLT-INFO: processing ending on \(.*\)/\1/g" | tail -1)
|
|
|
|
FAILED="The item that failed is $FF @ $Q"
|
|
|
|
fi
|
|
|
|
fi
|
|
|
|
|
|
|
|
if [[ -n "$FAILED" ]] ; then
|
|
|
|
echo "$FAILED"
|
|
|
|
echo "To reproduce, run: $BOLT $BOLT_OPTIONS $INPUT_BINARY $SEARCH_OPT -o $OPTIMIZED_BINARY"
|
|
|
|
else
|
|
|
|
echo "Unable to reproduce bug."
|
|
|
|
fi
|
|
|
|
|
|
|
|
rm $OPTIMIZED_BINARY $OUTPUT_FILE $BOLT_LOG
|