canvas-lms/Jenkinsfile.selenium.flakey...

278 lines
8.6 KiB
Groovy

#!/usr/bin/env groovy
/*
* Copyright (C) 2019 - present Instructure, Inc.
*
* This file is part of Canvas.
*
* Canvas is free software: you can redistribute it and/or modify it under
* the terms of the GNU Affero General Public License as published by the Free
* Software Foundation, version 3 of the License.
*
* Canvas is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
* details.
*
* You should have received a copy of the GNU Affero General Public License along
* with this program. If not, see <http://www.gnu.org/licenses/>.
*/
library "canvas-builds-library@${env.CANVAS_BUILDS_REFSPEC}"
loadLocalLibrary('local-lib', 'build/new-jenkins/library')
@groovy.transform.Field
def partitions = []
@groovy.transform.Field
def changedTests = ''
@groovy.transform.Field
def fscStatus = null
final static FSC_TIMEOUT = commitMessageFlag('fsc-timeout') as Integer ?: 20
def isPlugin() {
return env.GERRIT_PROJECT == 'canvas-lms' ? '0' : '1'
}
def getDockerWorkDir() {
if (env.GERRIT_PROJECT == 'qti_migration_tool') {
return "/usr/src/app/vendor/${env.GERRIT_PROJECT}"
}
return env.GERRIT_PROJECT == 'canvas-lms' ? '/usr/src/app' : "/usr/src/app/gems/plugins/${env.GERRIT_PROJECT}"
}
def getLocalWorkDir() {
if (env.GERRIT_PROJECT == 'qti_migration_tool') {
return "vendor/${env.GERRIT_PROJECT}"
}
return env.GERRIT_PROJECT == 'canvas-lms' ? '.' : "gems/plugins/${env.GERRIT_PROJECT}"
}
def getMaxNodes() {
return commitMessageFlag('fsc-max-nodes').asType(Integer) ?: 10
}
def setupNode() {
sh 'rm -vrf ./tmp'
def refspecToCheckout = env.GERRIT_PROJECT == 'canvas-lms' ? env.GERRIT_REFSPEC : env.CANVAS_LMS_REFSPEC
checkoutFromGit(gerritProjectUrl('canvas-lms'), refspec: refspecToCheckout, depth: 2)
if (env.IS_PLUGIN == '1') {
dir(env.LOCAL_WORKDIR) {
checkoutFromGit(gerritProjectUrl(), refspec: env.GERRIT_REFSPEC, depth: 2)
}
}
credentials.withStarlordCredentials { ->
sh(script: 'build/new-jenkins/docker-compose-pull.sh', label: 'Pull Images')
}
sh(script: 'build/new-jenkins/docker-compose-build-up.sh', label: 'Start Containers')
}
def computeTestCount() {
// oops, probably should have added an easier way to _count_ tests...
sh 'rm -vrf $LOCAL_WORKDIR/tmp'
sh 'mkdir -v $LOCAL_WORKDIR/tmp'
sh 'chmod -vv 777 $LOCAL_WORKDIR/tmp'
if (FORCE_FAILURE == '1') {
changedTests = 'spec/force_failure_spec.rb'
} else {
sh '''
docker run --volume $(pwd)/$LOCAL_WORKDIR/.git:$DOCKER_WORKDIR/.git \
--volume $(pwd)/$LOCAL_WORKDIR/tmp:$DOCKER_WORKDIR/tmp \
--env FSC_IGNORE_FILES \
-w=$DOCKER_WORKDIR \
$PATCHSET_TAG \
bash -c "git config --global --add safe.directory $DOCKER_WORKDIR; flakey_spec_catcher --use-parent --list-child-specs --dry-run-quiet > $DOCKER_WORKDIR/tmp/test_list"
'''
changedTests = readFile("$env.LOCAL_WORKDIR/tmp/test_list").trim()
}
echo "raw result from catcher: \n====\n$changedTests\n===="
if (changedTests.length() == 0) {
echo 'no tests found to execute'
return
}
def changedTestsArr = changedTests.split('\n').collect { changedTest ->
"$DOCKER_WORKDIR/$changedTest"
}
def testCount = changedTestsArr.size()
def weightedValues = changedTestsArr.collect { changedTest ->
changedTest.contains('selenium') ? env.SELENIUM_RATIO.toInteger() : 1
}
def distributedFactor = env.TESTS_PER_NODE.toInteger()
def i = 0
def curPartition = []
def curWeight = 0
while (i < testCount) {
curPartition.add(changedTestsArr[i])
curWeight += weightedValues[i]
if (curWeight >= distributedFactor) {
partitions.add(curPartition)
curPartition = []
curWeight = 0
}
i++
}
if (curPartition.size() > 0) {
partitions.add(curPartition)
}
echo "expected nodes to run on for $testCount tests: ${partitions.size()}"
}
def executeFlakeySpecCatcher() {
try {
sh 'build/new-jenkins/rspec-flakey-spec-catcher.sh'
} catch (org.jenkinsci.plugins.workflow.steps.FlowInterruptedException e) {
if (e.causes[0] instanceof org.jenkinsci.plugins.workflow.steps.TimeoutStepExecution.ExceededTimeout) {
echo 'Failing the build due to timeouts'
fscStatus = 'timeout'
throw e
} else if (e.causes[0] instanceof jenkins.model.CauseOfInterruption.UserInterruption) {
echo 'Build aborted'
fscStatus = 'aborted'
} else {
throw e
}
} finally {
sh """
rm -vrf tmp && mkdir -p tmp
docker cp flakey-spec-catcher_canvas_1:/usr/src/app/tmp/fsc.out tmp/fsc-${env.NODE_NUMBER}.out || true
"""
archiveArtifacts artifacts: "tmp/fsc-${env.NODE_NUMBER}.out", allowEmptyArchive: true
}
}
def sendSlack(success) {
def color = fscStatus == 'timeout' ? 'warning' : (success ? 'good' : 'danger')
def jobInfo = "<https://gerrit.instructure.com/$env.GERRIT_CHANGE_NUMBER|Gerrit> | <$env.BUILD_URL|Jenkins>"
def message = "$jobInfo\n$changedTests"
if (fscStatus == 'timeout') {
message = "Timeout Occurred!\n$message"
}
slackSend channel: '#flakey_spec_catcher_noisy', color: color, message: message
}
def cleanupFn(status) {
if (fscStatus != 'aborted') {
sendSlack(status == 'SUCCESS')
}
}
pipeline {
agent none
options {
ansiColor('xterm')
timeout(time: FSC_TIMEOUT)
timestamps()
}
environment {
GERRIT_PORT = '29418'
GERRIT_URL = "$GERRIT_HOST:$GERRIT_PORT"
BUILD_REGISTRY_FQDN = configuration.buildRegistryFQDN()
COMPOSE_FILE = 'docker-compose.new-jenkins.yml:docker-compose.new-jenkins-selenium.yml:docker-compose.new-jenkins-flakey-spec-catcher.yml'
IS_PLUGIN = isPlugin()
DOCKER_WORKDIR = getDockerWorkDir()
LOCAL_WORKDIR = getLocalWorkDir()
FORCE_FAILURE = commitMessageFlag('force-failure-fsc').asBooleanInteger()
// fsc errors when running specs from gems.
// until we figure out how to run them, we should ignore them
FSC_IGNORE_FILES = 'gems/.*/spec/,spec/contracts/,spec/selenium/performance/'
POSTGRES_PASSWORD = 'sekret'
// Targeting 10 minutes / node, each node runs RSPEC_PROCESSES threads and
// repeats each test FSC_REPEAT_FACTOR times.
// Assumption: non-selenium tests take 500ms / test
// Assumption: selenium tests take 500ms * SELENIUM_RATIO / test
FSC_REPEAT_FACTOR = 10
MAX_NODES = getMaxNodes()
RSPEC_PROCESSES = 3
SELENIUM_RATIO = 20
TESTS_PER_NODE = 200
}
stages {
stage('Environment') {
steps {
script {
extendedStage('Runner').nodeRequirements(label: nodeLabel()).obeysAllowStages(false).execute {
def postBuildHandler = [
onStageEnded: { stageName, stageConfig, result ->
cleanupFn(stageConfig.status())
}
]
extendedStage('Builder').hooks(postBuildHandler).obeysAllowStages(false).execute {
stage('Setup') {
setupNode()
}
stage('Compute Build Distribution') {
computeTestCount()
}
stage('Run Flakey Spec Catcher') {
if (partitions.size() == 0) {
return
} else if (partitions.size() > env.MAX_NODES.toInteger()) {
error "Refusing to use more than ${env.MAX_NODES} nodes to catch flakey specs. Consider breaking this change into smaller pieces."
return
}
def stages = [:]
stages['flakey set 00'] = {
withEnv([
"FSC_TESTS=${partitions[0].join(',')}",
'NODE_NUMBER=00'
]) {
executeFlakeySpecCatcher()
}
}
for (int i = 1; i < partitions.size(); i++) {
// make sure to create a new index variable so this value gets
// captured by the lambda
def index = i
def nodeNumber = (index).toString().padLeft(2, '0')
stages["flakey set $nodeNumber"] = {
protectedNode(nodeLabel()) {
withEnv([
"FSC_TESTS=${partitions[index].join(',')}",
"NODE_NUMBER=$nodeNumber"
]) {
setupNode()
executeFlakeySpecCatcher()
}
}
}
}
parallel(stages)
}
}
}
}
}
}
}
}