653 lines
21 KiB
Go
653 lines
21 KiB
Go
// monitor.go
|
|
//
|
|
// This source file is part of the FoundationDB open source project
|
|
//
|
|
// Copyright 2021-2024 Apple Inc. and the FoundationDB project authors
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
//
|
|
|
|
package main
|
|
|
|
import (
|
|
"bufio"
|
|
"context"
|
|
"crypto/tls"
|
|
"encoding/json"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"net/http/pprof"
|
|
"os"
|
|
"os/exec"
|
|
"os/signal"
|
|
"path"
|
|
"strconv"
|
|
"strings"
|
|
"sync"
|
|
"syscall"
|
|
"time"
|
|
|
|
"github.com/apple/foundationdb/fdbkubernetesmonitor/api"
|
|
"github.com/apple/foundationdb/fdbkubernetesmonitor/internal/certloader"
|
|
"github.com/fsnotify/fsnotify"
|
|
"github.com/go-logr/logr"
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
"github.com/prometheus/client_golang/prometheus/collectors"
|
|
"github.com/prometheus/client_golang/prometheus/promhttp"
|
|
"k8s.io/apimachinery/pkg/api/equality"
|
|
"k8s.io/utils/pointer"
|
|
)
|
|
|
|
const (
|
|
// maxErrorBackoffSeconds is the maximum time to wait after a process fails before starting another process.
|
|
// The actual delay will be based on the observed errors and will increase until maxErrorBackoffSeconds is hit.
|
|
maxErrorBackoffSeconds = 60 * time.Second
|
|
|
|
// fdbClusterFilePath defines the default path to the fdb cluster file that contains the current connection string.
|
|
// This file is managed by the fdbserver processes itself and they will automatically update the file if the
|
|
// coordinators have changed.
|
|
fdbClusterFilePath = "/var/fdb/data/fdb.cluster"
|
|
)
|
|
|
|
// monitor provides the main monitor loop
|
|
type monitor struct {
|
|
// configFile defines the path to the config file to load.
|
|
configFile string
|
|
|
|
// currentContainerVersion defines the version of the container. This will be the same as the fdbserver version.
|
|
currentContainerVersion api.Version
|
|
|
|
// customEnvironment defines the custom environment variables to use when
|
|
// interpreting the monitor configuration.
|
|
customEnvironment map[string]string
|
|
|
|
// activeConfiguration defines the active process configuration.
|
|
activeConfiguration *api.ProcessConfiguration
|
|
|
|
// activeConfigurationBytes defines the source data for the active process
|
|
// configuration.
|
|
activeConfigurationBytes []byte
|
|
|
|
// lastConfigurationTime is the last time we successfully reloaded the
|
|
// configuration file.
|
|
lastConfigurationTime time.Time
|
|
|
|
// processCount defines how many processes the
|
|
processCount int
|
|
|
|
// processIDs stores the PIDs of the processes that are running. A PID of
|
|
// zero will indicate that a process does not have a run loop. A PID of -1
|
|
// will indicate that a process has a run loop but is not currently running
|
|
// the subprocess.
|
|
processIDs []int
|
|
|
|
// mutex defines a mutex around working with configuration.
|
|
// This is used to synchronize access to local state like the active
|
|
// configuration and the process IDs from multiple goroutines.
|
|
mutex sync.Mutex
|
|
|
|
// podClient is a client for posting updates about this pod to
|
|
// Kubernetes.
|
|
podClient *kubernetesClient
|
|
|
|
// logger is the logger instance for this monitor.
|
|
logger logr.Logger
|
|
|
|
// metrics represents the prometheus monitor metrics.
|
|
metrics *metrics
|
|
}
|
|
|
|
type httpConfig struct {
|
|
listenAddr, certPath, keyPath, rootCaPath string
|
|
}
|
|
|
|
// startMonitor starts the monitor loop.
|
|
func startMonitor(ctx context.Context, logger logr.Logger, configFile string, customEnvironment map[string]string, processCount int, promConfig httpConfig, enableDebug bool, currentContainerVersion api.Version, enableNodeWatcher bool) {
|
|
client, err := createPodClient(ctx, logger, enableNodeWatcher, setupCache)
|
|
if err != nil {
|
|
logger.Error(err, "could not create Pod client")
|
|
os.Exit(1)
|
|
}
|
|
|
|
mon := &monitor{
|
|
configFile: configFile,
|
|
podClient: client,
|
|
logger: logger,
|
|
customEnvironment: customEnvironment,
|
|
processCount: processCount,
|
|
processIDs: make([]int, processCount+1),
|
|
currentContainerVersion: currentContainerVersion,
|
|
}
|
|
|
|
go func() { mon.watchPodTimestamps() }()
|
|
|
|
mux := http.NewServeMux()
|
|
// Enable pprof endpoints for debugging purposes.
|
|
if enableDebug {
|
|
mux.Handle("/debug/pprof/heap", pprof.Handler("heap"))
|
|
mux.Handle("/debug/pprof/goroutine", pprof.Handler("goroutine"))
|
|
mux.Handle("/debug/pprof/threadcreate", pprof.Handler("threadcreate"))
|
|
mux.Handle("/debug/pprof/allocs", pprof.Handler("allocs"))
|
|
mux.Handle("/debug/pprof/block", pprof.Handler("block"))
|
|
mux.Handle("/debug/pprof/mutex", pprof.Handler("mutex"))
|
|
mux.HandleFunc("/debug/pprof/", pprof.Index)
|
|
mux.HandleFunc("/debug/pprof/profile", pprof.Profile)
|
|
mux.HandleFunc("/debug/pprof/symbol", pprof.Symbol)
|
|
mux.HandleFunc("/debug/pprof/cmdline", pprof.Cmdline)
|
|
mux.HandleFunc("/debug/pprof/trace", pprof.Trace)
|
|
}
|
|
|
|
reg := prometheus.NewRegistry()
|
|
// Enable the default go metrics.
|
|
reg.MustRegister(collectors.NewGoCollector())
|
|
monitorMetrics := registerMetrics(reg)
|
|
mon.metrics = monitorMetrics
|
|
promHandler := promhttp.HandlerFor(reg, promhttp.HandlerOpts{})
|
|
|
|
// Add Prometheus support
|
|
mux.Handle("/metrics", promHandler)
|
|
go func() {
|
|
if promConfig.keyPath != "" || promConfig.certPath != "" {
|
|
certLoader := certloader.NewCertLoader(logger, promConfig.certPath, promConfig.keyPath)
|
|
tlsConfig := &tls.Config{
|
|
GetCertificate: certLoader.GetCertificate,
|
|
}
|
|
server := &http.Server{
|
|
Addr: promConfig.listenAddr,
|
|
Handler: mux,
|
|
TLSConfig: tlsConfig,
|
|
}
|
|
err = server.ListenAndServeTLS("", "")
|
|
if err != nil {
|
|
logger.Error(err, "could not start HTTPS server")
|
|
os.Exit(1)
|
|
}
|
|
}
|
|
err := http.ListenAndServe(promConfig.listenAddr, mux)
|
|
if err != nil {
|
|
logger.Error(err, "could not start HTTP server")
|
|
os.Exit(1)
|
|
}
|
|
}()
|
|
|
|
mon.run()
|
|
}
|
|
|
|
// updateCustomEnvironment will add the node labels and their values to the custom environment map. All the generated
|
|
// environment variables will start with NODE_LABEL and "/" and "." will be replaced in the key as "_", e.g. from the
|
|
// label "foundationdb.org/testing = awesome" the env variables NODE_LABEL_FOUNDATIONDB_ORG_TESTING = awesome" will be
|
|
// generated.
|
|
func (monitor *monitor) updateCustomEnvironmentFromNodeMetadata() {
|
|
if monitor.podClient.nodeMetadata == nil {
|
|
return
|
|
}
|
|
|
|
nodeLabels := monitor.podClient.nodeMetadata.Labels
|
|
for key, value := range nodeLabels {
|
|
sanitizedKey := strings.ReplaceAll(key, "/", "_")
|
|
sanitizedKey = strings.ReplaceAll(sanitizedKey, ".", "_")
|
|
envKey := "NODE_LABEL_" + strings.ToUpper(sanitizedKey)
|
|
currentValue, ok := monitor.customEnvironment[envKey]
|
|
if !ok {
|
|
monitor.logger.Info("adding new custom environment variable from node labels", "key", envKey, "value", value)
|
|
monitor.customEnvironment[envKey] = value
|
|
continue
|
|
}
|
|
|
|
if currentValue == value {
|
|
continue
|
|
}
|
|
|
|
monitor.logger.Info("update custom environment variable from node labels", "key", envKey, "newValue", value, "currentValue", currentValue)
|
|
monitor.customEnvironment[envKey] = value
|
|
continue
|
|
}
|
|
}
|
|
|
|
// readConfiguration reads the latest configuration from the monitor file.
|
|
func (monitor *monitor) readConfiguration() (*api.ProcessConfiguration, []byte) {
|
|
file, err := os.Open(monitor.configFile)
|
|
if err != nil {
|
|
monitor.logger.Error(err, "Error reading monitor config file", "monitorConfigPath", monitor.configFile)
|
|
return nil, nil
|
|
}
|
|
defer func() {
|
|
err := file.Close()
|
|
monitor.logger.Error(err, "Error could not close file", "monitorConfigPath", monitor.configFile)
|
|
}()
|
|
configuration := &api.ProcessConfiguration{}
|
|
configurationBytes, err := io.ReadAll(file)
|
|
if err != nil {
|
|
monitor.logger.Error(err, "Error reading monitor configuration", "monitorConfigPath", monitor.configFile)
|
|
}
|
|
err = json.Unmarshal(configurationBytes, configuration)
|
|
if err != nil {
|
|
monitor.logger.Error(err, "Error parsing monitor configuration", "rawConfiguration", string(configurationBytes))
|
|
return nil, nil
|
|
}
|
|
|
|
if configuration.Version == nil {
|
|
monitor.logger.Error(err, "Error could not parse configured version", "rawConfiguration", string(configurationBytes))
|
|
return nil, nil
|
|
}
|
|
|
|
// If the versions are protocol compatible don't try to point to another binary path. Otherwise, the processes will
|
|
// cannot restart when a process crashes during a patch upgrade.
|
|
if monitor.currentContainerVersion.IsProtocolCompatible(*configuration.Version) {
|
|
configuration.BinaryPath = fdbserverPath
|
|
} else {
|
|
configuration.BinaryPath = path.Join(sharedBinaryDir, configuration.Version.String(), "fdbserver")
|
|
}
|
|
|
|
err = checkOwnerExecutable(configuration.BinaryPath)
|
|
if err != nil {
|
|
monitor.logger.Error(err, "Error with binary path for latest configuration", "configuration", configuration, "binaryPath", configuration.BinaryPath)
|
|
return nil, nil
|
|
}
|
|
|
|
monitor.updateCustomEnvironmentFromNodeMetadata()
|
|
_, err = configuration.GenerateArguments(1, monitor.customEnvironment)
|
|
if err != nil {
|
|
monitor.logger.Error(err, "Error generating arguments for latest configuration", "configuration", configuration, "binaryPath", configuration.BinaryPath)
|
|
return nil, nil
|
|
}
|
|
|
|
if configuration.ShouldRunServers() {
|
|
// In case that the process is isolated we don't want to start the servers and we should terminate the running fdbserver
|
|
// instances.
|
|
if monitor.processIsIsolated() {
|
|
configuration.RunServers = pointer.Bool(false)
|
|
}
|
|
}
|
|
|
|
return configuration, configurationBytes
|
|
}
|
|
|
|
// loadConfiguration loads the latest configuration from the config file.
|
|
func (monitor *monitor) loadConfiguration() {
|
|
configuration, configurationBytes := monitor.readConfiguration()
|
|
if configuration == nil || len(configurationBytes) == 0 {
|
|
return
|
|
}
|
|
|
|
monitor.acceptConfiguration(configuration, configurationBytes)
|
|
}
|
|
|
|
// checkOwnerExecutable validates that a path is a file that exists and is
|
|
// executable by its owner.
|
|
func checkOwnerExecutable(path string) error {
|
|
binaryStat, err := os.Stat(path)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if binaryStat.Mode()&0o100 == 0 {
|
|
return fmt.Errorf("binary is not executable")
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// acceptConfiguration is called when the monitor process parses and accepts
|
|
// a configuration from the local config file.
|
|
func (monitor *monitor) acceptConfiguration(configuration *api.ProcessConfiguration, configurationBytes []byte) {
|
|
monitor.mutex.Lock()
|
|
defer monitor.mutex.Unlock()
|
|
|
|
// If the configuration hasn't changed ignore those events to prevent noisy logging.
|
|
if equality.Semantic.DeepEqual(monitor.activeConfiguration, configuration) {
|
|
return
|
|
}
|
|
|
|
monitor.logger.Info("Received new configuration file", "configuration", configuration)
|
|
monitor.activeConfiguration = configuration
|
|
monitor.activeConfigurationBytes = configurationBytes
|
|
monitor.lastConfigurationTime = time.Now()
|
|
// Update the prometheus metrics.
|
|
monitor.metrics.registerConfigurationChange(configuration.Version.String())
|
|
|
|
var hasRunningProcesses bool
|
|
for processNumber := 1; processNumber <= monitor.processCount; processNumber++ {
|
|
if monitor.processIDs[processNumber] == 0 {
|
|
monitor.processIDs[processNumber] = -1
|
|
tempNumber := processNumber
|
|
go func() { monitor.runProcess(tempNumber) }()
|
|
continue
|
|
}
|
|
|
|
hasRunningProcesses = true
|
|
}
|
|
|
|
// If the monitor has running processes but the processes shouldn't be running, kill them with SIGTERM.
|
|
if hasRunningProcesses && !monitor.activeConfiguration.ShouldRunServers() {
|
|
monitor.sendSignalToProcesses(syscall.SIGTERM)
|
|
}
|
|
|
|
err := monitor.podClient.updateAnnotations(monitor)
|
|
if err != nil {
|
|
monitor.logger.Error(err, "Error updating pod annotations")
|
|
}
|
|
}
|
|
|
|
// getBackoffDuration returns the backoff duration. The backoff time will increase exponential with a maximum of 60 seconds.
|
|
func getBackoffDuration(errorCounter int) time.Duration {
|
|
timeToBackoff := time.Duration(errorCounter*errorCounter) * time.Second
|
|
if timeToBackoff > maxErrorBackoffSeconds {
|
|
return maxErrorBackoffSeconds
|
|
}
|
|
|
|
return timeToBackoff
|
|
}
|
|
|
|
// runProcess runs a loop to continually start and watch a process.
|
|
func (monitor *monitor) runProcess(processNumber int) {
|
|
pid := 0
|
|
logger := monitor.logger.WithValues("processNumber", processNumber, "area", "runProcess")
|
|
logger.Info("Starting run loop")
|
|
startTime := time.Now()
|
|
// Counts the successive errors that occurred during process start up. Based on the error count the backoff time
|
|
// will be calculated.
|
|
var errorCounter int
|
|
|
|
for {
|
|
if !monitor.processRequired(processNumber) {
|
|
return
|
|
}
|
|
|
|
durationSinceLastStart := time.Since(startTime)
|
|
// If for more than 5 minutes no error have occurred we reset the error counter to reset the backoff time.
|
|
if durationSinceLastStart > 5*time.Minute {
|
|
errorCounter = 0
|
|
}
|
|
|
|
arguments, err := monitor.activeConfiguration.GenerateArguments(processNumber, monitor.customEnvironment)
|
|
if err != nil {
|
|
backoffDuration := getBackoffDuration(errorCounter)
|
|
logger.Error(err, "Error generating arguments for subprocess", "configuration", monitor.activeConfiguration, "errorCounter", errorCounter, "backoffDuration", backoffDuration.String())
|
|
time.Sleep(backoffDuration)
|
|
errorCounter++
|
|
continue
|
|
}
|
|
cmd := exec.Cmd{
|
|
Path: arguments[0],
|
|
Args: arguments,
|
|
}
|
|
|
|
logger.Info("Starting subprocess", "arguments", arguments)
|
|
|
|
stdout, err := cmd.StdoutPipe()
|
|
if err != nil {
|
|
logger.Error(err, "Error getting stdout from subprocess")
|
|
}
|
|
|
|
stderr, err := cmd.StderrPipe()
|
|
if err != nil {
|
|
logger.Error(err, "Error getting stderr from subprocess")
|
|
}
|
|
|
|
err = cmd.Start()
|
|
if err != nil {
|
|
backoffDuration := getBackoffDuration(errorCounter)
|
|
logger.Error(err, "Error starting subprocess", "backoffDuration", backoffDuration.String())
|
|
time.Sleep(backoffDuration)
|
|
errorCounter++
|
|
continue
|
|
}
|
|
|
|
// Update the prometheus metrics for the process.
|
|
monitor.metrics.registerProcessStartup(processNumber, monitor.activeConfiguration.Version.String())
|
|
|
|
if cmd.Process != nil {
|
|
pid = cmd.Process.Pid
|
|
} else {
|
|
logger.Error(nil, "No Process information available for subprocess")
|
|
}
|
|
|
|
startTime = time.Now()
|
|
logger.Info("Subprocess started", "PID", pid)
|
|
|
|
monitor.updateProcessID(processNumber, pid)
|
|
|
|
if stdout != nil {
|
|
stdoutScanner := bufio.NewScanner(stdout)
|
|
go func() {
|
|
for stdoutScanner.Scan() {
|
|
logger.Info("Subprocess output", "msg", stdoutScanner.Text(), "PID", pid)
|
|
}
|
|
}()
|
|
}
|
|
|
|
if stderr != nil {
|
|
stderrScanner := bufio.NewScanner(stderr)
|
|
go func() {
|
|
for stderrScanner.Scan() {
|
|
logger.Error(nil, "Subprocess error log", "msg", stderrScanner.Text(), "PID", pid)
|
|
}
|
|
}()
|
|
}
|
|
|
|
err = cmd.Wait()
|
|
if err != nil {
|
|
logger.Error(err, "Error from subprocess", "PID", pid)
|
|
}
|
|
exitCode := -1
|
|
if cmd.ProcessState != nil {
|
|
exitCode = cmd.ProcessState.ExitCode()
|
|
}
|
|
|
|
processDuration := time.Since(startTime)
|
|
logger.Info("Subprocess terminated", "exitCode", exitCode, "PID", pid, "lastExecutionDurationSeconds", processDuration.String())
|
|
monitor.updateProcessID(processNumber, -1)
|
|
|
|
// Only backoff if the exit code is non-zero.
|
|
if exitCode != 0 {
|
|
backoffDuration := getBackoffDuration(errorCounter)
|
|
logger.Info("Backing off from restarting subprocess", "backoffDuration", backoffDuration.String(), "lastExecutionDurationSeconds", processDuration.String(), "errorCounter", errorCounter, "exitCode", exitCode)
|
|
time.Sleep(backoffDuration)
|
|
errorCounter++
|
|
}
|
|
}
|
|
}
|
|
|
|
// processRequired determines if the latest configuration requires that a
|
|
// process stay running.
|
|
// If the process is no longer desired, this will remove it from the process ID
|
|
// list and return false. If the process is still desired, this will return
|
|
// true.
|
|
func (monitor *monitor) processRequired(processNumber int) bool {
|
|
monitor.mutex.Lock()
|
|
defer monitor.mutex.Unlock()
|
|
logger := monitor.logger.WithValues("processNumber", processNumber, "area", "processRequired")
|
|
if monitor.processCount < processNumber || !monitor.activeConfiguration.ShouldRunServers() {
|
|
if monitor.processIDs[processNumber] != 0 {
|
|
logger.Info("Terminating run loop")
|
|
monitor.processIDs[processNumber] = 0
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
return true
|
|
}
|
|
|
|
// processIsIsolated returns true if the IsolateProcessGroupAnnotation is set to "true".
|
|
func (monitor *monitor) processIsIsolated() bool {
|
|
if monitor.podClient.podMetadata == nil {
|
|
return false
|
|
}
|
|
|
|
if monitor.podClient.podMetadata.Annotations == nil {
|
|
return false
|
|
}
|
|
|
|
val, ok := monitor.podClient.podMetadata.Annotations[api.IsolateProcessGroupAnnotation]
|
|
if !ok {
|
|
return false
|
|
}
|
|
|
|
isolated, err := strconv.ParseBool(val)
|
|
if err != nil {
|
|
monitor.logger.Error(err, "could not parse the value of the %s annotation", api.IsolateProcessGroupAnnotation)
|
|
return false
|
|
}
|
|
|
|
return isolated
|
|
}
|
|
|
|
// updateProcessID records a new Process ID from a newly launched process.
|
|
func (monitor *monitor) updateProcessID(processNumber int, pid int) {
|
|
monitor.mutex.Lock()
|
|
defer monitor.mutex.Unlock()
|
|
monitor.processIDs[processNumber] = pid
|
|
}
|
|
|
|
// watchConfiguration detects changes to the monitor configuration file.
|
|
func (monitor *monitor) watchConfiguration(watcher *fsnotify.Watcher) {
|
|
for {
|
|
select {
|
|
case event, ok := <-watcher.Events:
|
|
if !ok {
|
|
return
|
|
}
|
|
|
|
monitor.logger.Info("Detected event on monitor conf file or cluster file", "event", event)
|
|
if event.Op&fsnotify.Write == fsnotify.Write || event.Op&fsnotify.Create == fsnotify.Create {
|
|
monitor.handleFileChange(event.Name)
|
|
} else if event.Op&fsnotify.Remove == fsnotify.Remove {
|
|
err := watcher.Add(event.Name)
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
monitor.handleFileChange(event.Name)
|
|
}
|
|
case err, ok := <-watcher.Errors:
|
|
if !ok {
|
|
return
|
|
}
|
|
monitor.logger.Error(err, "Error watching for file system events")
|
|
}
|
|
}
|
|
}
|
|
|
|
// handleFileChange will perform the required action based on the changed/modified file.
|
|
func (monitor *monitor) handleFileChange(changedFile string) {
|
|
if changedFile == fdbClusterFilePath {
|
|
err := monitor.podClient.updateFdbClusterTimestampAnnotation()
|
|
if err != nil {
|
|
monitor.logger.Error(err, fmt.Sprintf("could not update %s annotation", api.ClusterFileChangeDetectedAnnotation))
|
|
}
|
|
return
|
|
}
|
|
|
|
monitor.loadConfiguration()
|
|
}
|
|
|
|
func (monitor *monitor) sendSignalToProcesses(signal os.Signal) {
|
|
for processNumber, processID := range monitor.processIDs {
|
|
if processID <= 0 {
|
|
continue
|
|
}
|
|
|
|
subprocessLogger := monitor.logger.WithValues("processNumber", processNumber, "PID", processID)
|
|
process, err := os.FindProcess(processID)
|
|
if err != nil {
|
|
subprocessLogger.Error(err, "Error finding subprocess")
|
|
continue
|
|
}
|
|
subprocessLogger.Info("Sending signal to subprocess", "signal", signal)
|
|
err = process.Signal(signal)
|
|
if err != nil {
|
|
subprocessLogger.Error(err, "Error signaling subprocess")
|
|
continue
|
|
}
|
|
}
|
|
}
|
|
|
|
// run runs the monitor loop.
|
|
func (monitor *monitor) run() {
|
|
done := make(chan bool, 1)
|
|
signals := make(chan os.Signal, 1)
|
|
signal.Notify(signals, syscall.SIGINT, syscall.SIGTERM)
|
|
|
|
go func() {
|
|
latestSignal := <-signals
|
|
monitor.logger.Info("Received system signal", "signal", latestSignal)
|
|
|
|
// Reset the processCount to 0 to make sure the monitor doesn't try to restart the processes.
|
|
monitor.processCount = 0
|
|
monitor.sendSignalToProcesses(latestSignal)
|
|
|
|
annotations := monitor.podClient.podMetadata.Annotations
|
|
if len(annotations) > 0 {
|
|
delayValue, ok := annotations[api.DelayShutdownAnnotation]
|
|
if ok {
|
|
delay, err := time.ParseDuration(delayValue)
|
|
if err == nil {
|
|
time.Sleep(delay)
|
|
}
|
|
}
|
|
}
|
|
|
|
done <- true
|
|
}()
|
|
|
|
monitor.loadConfiguration()
|
|
watcher, err := fsnotify.NewWatcher()
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
monitor.logger.Info("adding watch for file", "path", path.Base(monitor.configFile))
|
|
err = watcher.Add(monitor.configFile)
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
|
|
defer func(watcher *fsnotify.Watcher) {
|
|
err := watcher.Close()
|
|
if err != nil {
|
|
monitor.logger.Error(err, "could not close watcher")
|
|
}
|
|
}(watcher)
|
|
go func() { monitor.watchConfiguration(watcher) }()
|
|
|
|
// The cluster file will be created and managed by the fdbserver processes, so we have to wait until the fdbserver
|
|
// processes have been started. Except for the initial cluster creation this file should be present as soon as the
|
|
// monitor starts the processes.
|
|
for {
|
|
_, err = os.Stat(fdbClusterFilePath)
|
|
if errors.Is(err, os.ErrNotExist) {
|
|
monitor.logger.Info("waiting for file to be created", "path", fdbClusterFilePath)
|
|
time.Sleep(5 * time.Second)
|
|
continue
|
|
}
|
|
|
|
monitor.logger.Info("adding watch for file", "path", fdbClusterFilePath)
|
|
err = watcher.Add(fdbClusterFilePath)
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
break
|
|
}
|
|
|
|
<-done
|
|
}
|
|
|
|
// watchPodTimestamps watches the timestamp feed to reload the configuration.
|
|
func (monitor *monitor) watchPodTimestamps() {
|
|
for timestamp := range monitor.podClient.TimestampFeed {
|
|
if timestamp > monitor.lastConfigurationTime.Unix() {
|
|
monitor.loadConfiguration()
|
|
}
|
|
}
|
|
}
|