canvas-lms/Jenkinsfile.selenium.flakey...

277 lines
8.5 KiB
Groovy

#!/usr/bin/env groovy
/*
* Copyright (C) 2019 - present Instructure, Inc.
*
* This file is part of Canvas.
*
* Canvas is free software: you can redistribute it and/or modify it under
* the terms of the GNU Affero General Public License as published by the Free
* Software Foundation, version 3 of the License.
*
* Canvas is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
* details.
*
* You should have received a copy of the GNU Affero General Public License along
* with this program. If not, see <http://www.gnu.org/licenses/>.
*/
library "canvas-builds-library@${env.CANVAS_BUILDS_REFSPEC}"
@groovy.transform.Field
def partitions = []
@groovy.transform.Field
def changed_tests = ''
@groovy.transform.Field
def fsc_status = null
def FSC_TIMEOUT = configuration.getInteger('fsc-timeout', 20)
def isPlugin() {
return env.GERRIT_PROJECT == "canvas-lms" ? "0" : "1"
}
def getDockerWorkDir() {
return env.GERRIT_PROJECT == "canvas-lms" ? "/usr/src/app" : "/usr/src/app/gems/plugins/${env.GERRIT_PROJECT}"
}
def getLocalWorkDir() {
return env.GERRIT_PROJECT == "canvas-lms" ? "." : "gems/plugins/${env.GERRIT_PROJECT}"
}
def setupNode() {
cleanAndSetup()
sh 'rm -vrf ./tmp'
def refspecToCheckout = env.GERRIT_PROJECT == "canvas-lms" ? env.GERRIT_REFSPEC : env.CANVAS_LMS_REFSPEC
checkoutRepo("canvas-lms", refspecToCheckout, 2)
if (env.IS_PLUGIN == "1") {
dir(env.LOCAL_WORKDIR) {
checkoutRepo(GERRIT_PROJECT, env.GERRIT_REFSPEC, 2)
}
}
sh 'build/new-jenkins/docker-compose-pull.sh'
sh 'build/new-jenkins/docker-compose-build-up.sh'
}
def computeTestCount() {
// oops, probably should have added an easier way to _count_ tests...
sh 'rm -vrf $LOCAL_WORKDIR/tmp'
sh 'mkdir -v $LOCAL_WORKDIR/tmp'
sh 'chmod -vv 777 $LOCAL_WORKDIR/tmp'
if (FORCE_FAILURE == '1') {
changed_tests = 'spec/force_failure_spec.rb'
} else {
sh '''
docker run --volume $(pwd)/$LOCAL_WORKDIR/.git:$DOCKER_WORKDIR/.git \
--volume $(pwd)/$LOCAL_WORKDIR/tmp:$DOCKER_WORKDIR/tmp \
--env FSC_IGNORE_FILES \
-w=$DOCKER_WORKDIR \
$PATCHSET_TAG \
bash -c "flakey_spec_catcher --use-parent --dry-run-quiet > $DOCKER_WORKDIR/tmp/test_list"
'''
changed_tests = readFile("$env.LOCAL_WORKDIR/tmp/test_list").trim()
}
echo "raw result from catcher: \n====\n$changed_tests\n===="
if(changed_tests.length() == 0) {
echo "no tests found to execute"
return
}
def changed_tests_arr = changed_tests.split('\n').collect {
"$DOCKER_WORKDIR/$it"
}
sh """
docker-compose --project-name canvas-lms0 run --rm -v \$(pwd)/\$LOCAL_WORKDIR/tmp:\$DOCKER_WORKDIR/tmp \
canvas \
bash -c "bundle exec rspec --dry-run ${changed_tests_arr.join(' ')} --require ./build/new-jenkins/rspec_location.rb --format RSpecLocationFormatter --out ${env.DOCKER_WORKDIR}/tmp/test_list_split"
"""
changed_tests = readFile("$env.LOCAL_WORKDIR/tmp/test_list_split").trim()
changed_tests_arr = changed_tests.split('\n')
echo "raw result from splitter: \n====\n$changed_tests\n===="
if(changed_tests.length() == 0) {
echo "no tests found to execute"
return
}
def test_count = changed_tests_arr.size()
def weighted_values = changed_tests_arr.collect {
it.contains('selenium') ? env.SELENIUM_RATIO.toInteger() : 1
}
def distributed_factor = env.TESTS_PER_NODE.toInteger()
def i = 0
def cur_partition = []
def cur_weight = 0
while(i < test_count) {
cur_partition.add(changed_tests_arr[i])
cur_weight += weighted_values[i]
if(cur_weight >= distributed_factor) {
partitions.add(cur_partition)
cur_partition = []
cur_weight = 0
}
i++
}
if(cur_partition.size() > 0) {
partitions.add(cur_partition)
}
echo "expected nodes to run on for $test_count tests: ${partitions.size()}"
}
def executeFlakeySpecCatcher(prefix = 'main') {
sh 'rm -vrf tmp'
try {
sh 'seq 0 $((DOCKER_PROCESSES-1)) | parallel "build/new-jenkins/rspec-flakey-spec-catcher.sh {}"'
} catch (org.jenkinsci.plugins.workflow.steps.FlowInterruptedException e) {
if (e.causes[0] instanceof org.jenkinsci.plugins.workflow.steps.TimeoutStepExecution.ExceededTimeout) {
echo "Failing the build due to timeouts"
fsc_status = 'timeout'
throw e
} else if(e.causes[0] instanceof jenkins.model.CauseOfInterruption.UserInterruption) {
echo "Build aborted"
fsc_status = 'aborted'
} else {
throw e
}
} finally {
sh "build/new-jenkins/docker-copy-files.sh /usr/src/app/tmp/fsc.out tmp canvas_ --allow-error --clean-dir"
archiveArtifacts artifacts: "tmp/**/fsc.out", allowEmptyArchive: true
}
}
def sendSlack(success) {
def color = fsc_status == 'timeout' ? 'warning' : (success ? 'good' : 'danger')
def jobInfo = "<https://gerrit.instructure.com/$env.GERRIT_CHANGE_NUMBER|Gerrit> | <$env.BUILD_URL|Jenkins>"
def message = "$jobInfo\n$changed_tests"
if (fsc_status == 'timeout') {
message = "Timeout Occurred!\n$message"
}
slackSend channel: '#flakey_spec_catcher_noisy', color: color, message: message
}
def cleanupFn(status) {
try {
if(fsc_status != 'aborted') {
sendSlack(status == 'SUCCESS')
}
} finally {
sh 'rm -vrf ./tmp/'
libraryScript.execute 'bash/docker-cleanup.sh --allow-failure'
}
}
pipeline {
agent none
options {
ansiColor('xterm')
timeout(time: FSC_TIMEOUT)
timestamps()
}
environment {
GERRIT_PORT = '29418'
GERRIT_URL = "$GERRIT_HOST:$GERRIT_PORT"
BUILD_REGISTRY_FQDN = configuration.buildRegistryFQDN()
COMPOSE_FILE = 'docker-compose.new-jenkins.yml:docker-compose.new-jenkins-selenium.yml:docker-compose.new-jenkins-flakey-spec-catcher.yml'
IS_PLUGIN = isPlugin()
DOCKER_WORKDIR = getDockerWorkDir()
LOCAL_WORKDIR = getLocalWorkDir()
FORCE_FAILURE = configuration.forceFailureFSC()
// fsc errors when running specs from gems.
// until we figure out how to run them, we should ignore them
FSC_IGNORE_FILES = "gems/.*/spec/,spec/contracts/"
POSTGRES_PASSWORD = 'sekret'
SELENIUM_VERSION = "3.141.59-20201119"
// Targeting 10 minutes / node, each node runs DOCKER_PROCESSES threads and
// repeats each test FSC_REPEAT_FACTOR times.
// Assumption: non-selenium tests take 500ms / test
// Assumption: selenium tests take 500ms * SELENIUM_RATIO / test
DOCKER_PROCESSES = 3
FSC_REPEAT_FACTOR = 10
MAX_NODES = configuration.getString('fsc-max-nodes', '10')
SELENIUM_RATIO = 20
TESTS_PER_NODE = 200
}
stages {
stage('Environment') {
steps {
script {
protectedNode('canvas-docker', { status -> cleanupFn(status) }) {
stage('Setup') {
setupNode()
}
stage("Compute Build Distribution") {
computeTestCount()
}
stage("Run Flakey Spec Catcher") {
if(partitions.size() == 0) {
return
} else if(partitions.size() >= env.MAX_NODES.toInteger()) {
error "Refusing to use more than ${env.MAX_NODES} nodes to catch flakey specs. Consider breaking this change into smaller pieces."
return
}
def stages = [:]
stages["flakey set 00"] = {
withEnv(["FSC_TESTS=${partitions[0].join(',')}"]) {
executeFlakeySpecCatcher()
}
}
for(int i = 1; i < partitions.size(); i++) {
// make sure to create a new index variable so this value gets
// captured by the lambda
def index = i
def node_number = (index).toString().padLeft(2, '0')
stages["flakey set $node_number"] = {
protectedNode('canvas-docker') {
withEnv(["FSC_TESTS=${partitions[index].join(',')}"]) {
try {
setupNode()
executeFlakeySpecCatcher("node$node_number")
}
finally {
sh 'rm -vrf ./tmp'
libraryScript.execute 'bash/docker-cleanup.sh --allow-failure'
}
}
}
}
}
parallel(stages)
}
}
}
}
}
}
}