canvas-lms/script/rspec-queue-with-retries

126 lines
4.3 KiB
Bash
Executable File

#!/bin/bash
set -e
export TEST_QUEUE_VERBOSE=0
export TEST_QUEUE_REPLACE_STATS=1
export TEST_QUEUE_NUM_LAZY_LOADERS=4
export ERROR_CONTEXT_BASE_PATH="`pwd`/log/spec_failures/Initial"
success_status=0
webdriver_crash_status=98
test_failure_status=99
max_failures=${RERUNS:=200}
rerun_number=0
runs_remaining=$((1+${RERUNS_RETRY:=3}))
spec_list=${SPEC_FILES:=spec}
rerun_line="adding spec to rerun (\./[^ ]+)"
exempt_from_rerun_threshold_line="exceptions are exempt from rerun thresholds"
failed_relevant_spec_line="not adding modified spec to rerun (\./.*)"
pipe=/tmp/rspec-queue-pipe
rm -f $pipe
mkfifo $pipe
# sometimes the log_and_run_command fifo gets clogged and the echo fails,
# so sleep and try again
function try_echo {
for i in {1..5}; do
if echo "$@" 2>&-; then
[[ $i -gt 1 ]] && echo "try_echo succeeded on try $i" # TODO: remove me once we're sure it's working well
return 0
fi
sleep 1
done
return 1
}
echo "[rspec-queue:initial] STARTING"
while true; do
last_status=0
new_spec_list=()
exempt_spec_list=()
failed_relevant_spec_list=()
command="bundle exec script/rspec-queue $@ --failure-exit-code 99 $spec_list"
echo "Running [$command]"
$command >$pipe 2>&1 &
command_pid=$!
while IFS= read line; do
try_echo "$line" || exit 2
if [[ $line =~ $rerun_line ]]; then
new_spec="${BASH_REMATCH[1]}"
new_spec_list+=("$new_spec")
if [[ $line =~ $exempt_from_rerun_threshold_line ]]; then
exempt_spec_list+=("$new_spec")
fi
elif [[ $line =~ $failed_relevant_spec_line ]]; then
failed_relevant_spec_list+=("${BASH_REMATCH[1]}")
fi
done <$pipe
wait $command_pid || last_status=$?
[[ ! $reruns_started ]] && echo "[rspec-queue:initial] FINISHED"
# between test-queue and the formatter we get double output; uniq them
new_spec_list=($(echo "${new_spec_list[@]}"|tr ' ' '\n'|sort -u|tr '\n' ' '))
failed_relevant_spec_list=($(echo "${failed_relevant_spec_list[@]}"|tr ' ' '\n'|sort -u|tr '\n' ' '))
failures_exempt_from_rerun_threshold=$(echo "${exempt_spec_list[@]}"|tr ' ' '\n'|grep .|sort -u|wc -l)
echo "last status: $last_status"
[[ $last_status == $success_status ]] && break
if [[ $last_status != $success_status && $last_status != $webdriver_crash_status && $last_status != $test_failure_status ]]; then
echo "unexpected exit code $last_status! perhaps the code is horribly broken :("
break
fi
if [[ $last_status == $webdriver_crash_status ]]; then
echo "a webdriver worker crashed; retrigger your build"
break
fi
if [[ $last_status == $test_failure_status ]]; then
rerun_number=$((rerun_number+1))
runs_remaining=$((runs_remaining-1))
num_failures=${#new_spec_list[@]}
[[ $runs_remaining == 0 ]] && { echo "reruns failed $num_failures failure(s)"; break; }
export ERROR_CONTEXT_BASE_PATH="`pwd`/log/spec_failures/Rerun $rerun_number"
failures_towards_rerun_threshold=$((num_failures-failures_exempt_from_rerun_threshold))
if [[ failures_towards_rerun_threshold -gt $max_failures ]]; then
echo "too many failures (got: $num_failures, exempt from threshold: $failures_exempt_from_rerun_threshold, max: $max_failures), build not eligible for reruns" || :
break
fi
failed_relevant_spec_list_length=${#failed_relevant_spec_list[@]}
if [[ "$DONT_RERUN_RELEVANT_SPECS" == "1" && failed_relevant_spec_list_length -gt 0 ]]; then
echo "specs relevant to this commit failed: ${failed_relevant_spec_list[@]}, build not eligible for reruns" || :
break
fi
if [[ $num_failures == 0 ]]; then
echo "nothing to re-run! perhaps the code is horribly broken? :("
break
fi
spec_list="${new_spec_list[@]}"
echo -e "failed, re-trying $num_failures failure(s) ($failures_towards_rerun_threshold against threshold), $runs_remaining attempt(s) left\n\n\n"
if [[ ! $reruns_started ]]; then
reruns_started=1
echo "[rspec-queue:reruns] STARTING"
unset TEST_QUEUE_REPLACE_STATS # ensure the rerun stats merge into the main ones
# not many files, and we want to maximize cores for actual workers on this node (since we won't relay)
export TEST_QUEUE_NUM_LAZY_LOADERS=1
fi
fi
done
[[ $reruns_started ]] && echo "[rspec-queue:reruns] FINISHED"
echo "rspec-queue-with-retries exiting with $last_status"
exit $last_status