canvas-lms/Jenkinsfile.selenium.flakey...

253 lines
8.4 KiB
Plaintext
Raw Normal View History

#!/usr/bin/env groovy
/*
* Copyright (C) 2019 - present Instructure, Inc.
*
* This file is part of Canvas.
*
* Canvas is free software: you can redistribute it and/or modify it under
* the terms of the GNU Affero General Public License as published by the Free
* Software Foundation, version 3 of the License.
*
* Canvas is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
* details.
*
* You should have received a copy of the GNU Affero General Public License along
* with this program. If not, see <http://www.gnu.org/licenses/>.
*/
library "canvas-builds-library"
@groovy.transform.Field
def result_test_count = -1
@groovy.transform.Field
def result_node_count = -1
@groovy.transform.Field
def changed_tests = ''
@groovy.transform.Field
def fsc_timeout = false
def isPlugin() {
return env.GERRIT_PROJECT == "canvas-lms" ? "0" : "1"
}
def getDockerWorkDir() {
return env.GERRIT_PROJECT == "canvas-lms" ? "/usr/src/app" : "/usr/src/app/gems/plugins/${env.GERRIT_PROJECT}"
}
def getLocalWorkDir() {
return env.GERRIT_PROJECT == "canvas-lms" ? "." : "gems/plugins/${env.GERRIT_PROJECT}"
}
def computeTestCount() {
if (env.IS_PLUGIN == "1") {
dir(env.LOCAL_WORKDIR) {
checkout([
$class: 'GitSCM',
branches: [[name: 'FETCH_HEAD']],
doGenerateSubmoduleConfigurations: false,
extensions: [],
submoduleCfg: [],
userRemoteConfigs: [[
credentialsId: '44aa91d6-ab24-498a-b2b4-911bcb17cc35',
name: 'origin',
refspec: "$env.GERRIT_REFSPEC",
url: "ssh://$GERRIT_URL/${GERRIT_PROJECT}.git"
]]
])
}
}
// oops, probably should have added an easier way to _count_ tests...
sh 'rm -vrf $LOCAL_WORKDIR/tmp'
sh 'mkdir -v $LOCAL_WORKDIR/tmp'
sh 'chmod -vv 777 $LOCAL_WORKDIR/tmp'
sh '''
docker run --volume $(pwd)/$LOCAL_WORKDIR/.git:$DOCKER_WORKDIR/.git \
--volume $(pwd)/$LOCAL_WORKDIR/tmp:$DOCKER_WORKDIR/tmp \
--env FSC_IGNORE_FILES \
-w=$DOCKER_WORKDIR \
$PATCHSET_TAG \
bash -c "flakey_spec_catcher --use-parent --dry-run-quiet > $DOCKER_WORKDIR/tmp/test_list"
'''
changed_tests = readFile("$env.LOCAL_WORKDIR/tmp/test_list").trim()
echo "raw result from catcher: \n====\n$changed_tests\n===="
def test_count = 0
if (changed_tests) {
test_count = changed_tests.split('\n').length
}
echo "expected tests to run: $test_count"
result_test_count = test_count
}
def computeDistributedCount() {
if (result_test_count < 0)
throw IllegalStateException("call computeTestCount() first")
// this type of distributed thing always needs a hard cutoff
if (env.DISTRIBUTED_CUT_OFF.toInteger() < result_test_count)
throw IllegalStateException("unable to process more than ${env.DISTRIBUTED_CUT_OFF} tests")
if (result_test_count == 0) {
result_node_count = 0
}
else if (env.IS_PLUGIN == "1") {
// lets not deal with distributing fsc for plugins unless we have to
result_node_count = 1
}
else {
// force a round down
// this will have the following node counts.
// test | nodes
// 1-14 | 1
// 15-24 | 2
// 25-34 | 3
// ...
def distributed_offset = env.DISTRIBUTED_OFFSET.toInteger()
def distributed_factor = env.DISTRIBUTED_FACTOR.toInteger()
result_node_count = (int) ((result_test_count + distributed_offset) / distributed_factor)
}
}
def executeFlakeySpecCatcher(prefix = 'main') {
sh 'rm -vrf tmp'
try {
timeout(30) {
sh 'build/new-jenkins/docker-compose-pull.sh'
sh 'build/new-jenkins/docker-compose-pull-selenium.sh'
sh 'build/new-jenkins/docker-compose-build-up.sh'
sh 'build/new-jenkins/docker-compose-setup-databases.sh'
sh 'build/new-jenkins/rspec-flakey-spec-catcher.sh'
}
}
// Don't fail the build for timeouts
catch (org.jenkinsci.plugins.workflow.steps.FlowInterruptedException e) {
if (!e.causes[0] instanceof org.jenkinsci.plugins.workflow.steps.TimeoutStepExecution.ExceededTimeout) {
throw e
} else {
echo "Not failing the build due to timeouts"
fsc_timeout = true
}
}
finally {
sh "mkdir -vp tmp/$prefix"
sh(
script: "docker cp \$(docker ps -q --filter 'name=canvas_'):/usr/src/app/tmp/fsc.out ./tmp/$prefix/fsc.out",
returnStatus: true
)
archiveArtifacts(artifacts: "tmp/$prefix/fsc.out", allowEmptyArchive: true)
}
}
def sendSlack(success) {
def color = success ? "good" : "danger"
def jobInfo = "<https://gerrit.instructure.com/$env.GERRIT_CHANGE_NUMBER|Gerrit> | <$env.BUILD_URL|Jenkins>"
def message = "$jobInfo\n$changed_tests"
if (fsc_timeout) {
message = "Timeout Occurred!\n$message"
}
slackSend channel: '#flakey_spec_catcher_noisy', color: color, message: message
}
def cleanupFn(status) {
try {
sendSlack(status == 'SUCCESS')
} finally {
sh 'rm -vrf ./tmp/'
execute 'bash/docker-cleanup.sh --allow-failure'
}
}
pipeline {
agent none
options {
ansiColor('xterm')
timestamps()
}
environment {
GERRIT_PORT = '29418'
GERRIT_URL = "$GERRIT_HOST:$GERRIT_PORT"
BUILD_REGISTRY_FQDN = configuration.buildRegistryFQDN()
COMPOSE_FILE = 'docker-compose.new-jenkins.yml:docker-compose.new-jenkins-selenium.yml:docker-compose.new-jenkins-flakey-spec-catcher.yml'
IS_PLUGIN = isPlugin()
DOCKER_WORKDIR = getDockerWorkDir()
LOCAL_WORKDIR = getLocalWorkDir()
FORCE_FAILURE = configuration.forceFailureFSC()
// there will be a node for every 10 tests
DISTRIBUTED_FACTOR = '10'
// this causes distribution to trigger at an offset
DISTRIBUTED_OFFSET = '5'
// if someone is trying to update more than these amount of tests, then just fail.
// that will be at DISTRIBUTED_CUT_OFF / DISTRIBUTED_FACTOR nodes.
DISTRIBUTED_CUT_OFF = '300'
// fsc errors when running specs from gems.
// until we figure out how to run them, we should ignore them
FSC_IGNORE_FILES = "gems/.*/spec/"
POSTGRES_PASSWORD = 'sekret'
SELENIUM_VERSION = "3.141.59-20200719"
}
stages {
stage('Environment') {
steps {
script {
protectedNode('canvas-docker', { status -> cleanupFn(status) }) {
stage('Setup') {
cleanAndSetup()
checkout scm
}
if(FORCE_FAILURE != '1') {
stage("Compute Build Distribution") {
computeTestCount()
computeDistributedCount()
echo "expected nodes to run on for $result_test_count tests: $result_node_count"
}
}
if(result_test_count > 0 || FORCE_FAILURE == '1') {
stage("Run Flakey Spec Catcher") {
if (FORCE_FAILURE == '1') {
echo "running force failure"
executeFlakeySpecCatcher()
}
else if (result_node_count <= 1) {
echo "running on this node"
executeFlakeySpecCatcher()
}
else {
echo "running on multiple nodes: $result_node_count"
def nodes = [:];
for(int i = 0; i < result_node_count; i++) {
// make sure to create a new index variable so this value gets
// captured by the lambda for FSC_NODE_INDEX
def index = i
def node_number = (index).toString().padLeft(2, '0')
nodes["flakey set $node_number"] = {
protectedNode('canvas-docker') {
withEnv(["FSC_NODE_TOTAL=$result_node_count", "FSC_NODE_INDEX=$index"]) {
try {
cleanAndSetup()
sh 'rm -vrf ./tmp'
checkout scm
executeFlakeySpecCatcher("node$node_number")
}
finally {
sh 'rm -vrf ./tmp'
execute 'bash/docker-cleanup.sh --allow-failure'
}
}
}
}
}
parallel(nodes)
}
}
}
}
}
}
}
}
}