Adds structured logging in fdb-kubernetes-monitor.
Adds a backoff window when restarting processes in fdb-kubernetes-monitor.
This commit is contained in:
parent
95ad5854be
commit
7c36123cf8
|
@ -0,0 +1 @@
|
|||
test:test@127.0.0.1:4501
|
|
@ -0,0 +1,7 @@
|
|||
export FDB_PUBLIC_IP=127.0.0.1
|
||||
export FDB_POD_IP=127.0.0.1
|
||||
export FDB_ZONE_ID=localhost
|
||||
export FDB_MACHINE_ID=localhost
|
||||
export FDB_INSTANCE_ID=storage-1
|
||||
export KUBERNETES_SERVICE_HOST=kubernetes.docker.internal
|
||||
export KUBERNETES_SERVICE_PORT=6443
|
|
@ -5,21 +5,24 @@ To test this, run the following commands from the root of the FoundationDB
|
|||
repository:
|
||||
|
||||
docker build -t foundationdb/foundationdb-kubernetes:latest --build-arg FDB_VERSION=6.3.15 --build-arg FDB_LIBRARY_VERSIONS="6.3.15 6.2.30 6.1.13" -f packaging/docker/kubernetes/Dockerfile .
|
||||
kubectl apply -f packaging/docker/kubernetes/config.yaml
|
||||
kubectl apply -f packaging/docker/kubernetes/test_config.yaml
|
||||
# Wait for the pods to become ready
|
||||
ips=$(kubectl get pod -l app=fdb-kubernetes-example -o json | jq -j '[[.items|.[]|select(.status.podIP!="")]|limit(3;.[])|.status.podIP+":4501"]|join(",")')
|
||||
cat packaging/docker/kubernetes/config.yaml | sed -e "s/fdb.cluster: \"\"/fdb.cluster: \"test:test@$ips\"/" -e "s/\"serverCount\": 0/\"serverCount\": 1/" | kubectl apply -f -
|
||||
cat packaging/docker/kubernetes/test_config.yaml | sed -e "s/fdb.cluster: \"\"/fdb.cluster: \"test:test@$ips\"/" -e "s/\"serverCount\": 0/\"serverCount\": 1/" | kubectl apply -f -
|
||||
kubectl get pod -l app=fdb-kubernetes-example -o name | xargs -I {} kubectl annotate {} foundationdb.org/outdated-config-map-seen=$(date +%s) --overwrite
|
||||
# Watch the logs for the fdb-kubernetes-example pods to confirm that they have launched the fdbserver processes.
|
||||
kubectl exec -it sts/fdb-kubernetes-example -- fdbcli --exec "configure new double ssd"
|
||||
|
||||
You can then make changes to the data in the config map and update the fdbserver processes:
|
||||
|
||||
kubectl apply -f packaging/docker/kubernetes/config.yaml
|
||||
cat packaging/docker/kubernetes/test_config.yaml | sed -e "s/fdb.cluster: \"\"/fdb.cluster: \"test:test@$ips\"/" -e "s/\"serverCount\": 0/\"serverCount\": 1/" | kubectl apply -f -
|
||||
|
||||
# You can apply an annotation to speed up the propagation of config
|
||||
kubectl get pod -l app=fdb-kubernetes-example -o name | xargs -I {} kubectl annotate {} foundationdb.org/outdated-config-map-seen=$(date +%s) --overwrite
|
||||
# Watch the logs for the fdb-kubernetes-example pods to confirm that they have launched the fdbserver processes.
|
||||
|
||||
# Watch the logs for the fdb-kubernetes-example pods to confirm that they have reloaded their configuration, and then do a bounce.
|
||||
kubectl exec -it sts/fdb-kubernetes-example -- fdbcli --exec "kill; kill all; status"
|
||||
|
||||
Once you are done, you can tear down the example with the following command:
|
||||
|
||||
kubectl delete -f packaging/docker/kubernetes/config.yaml; kubectl delete pvc -l app=fdb-kubernetes-example
|
||||
kubectl delete -f packaging/docker/kubernetes/test_config.yaml; kubectl delete pvc -l app=fdb-kubernetes-example
|
||||
|
|
|
@ -24,6 +24,9 @@ import (
|
|||
"os"
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/go-logr/zapr"
|
||||
"go.uber.org/zap"
|
||||
)
|
||||
|
||||
func loadConfigFromFile(path string) (*ProcessConfiguration, error) {
|
||||
|
@ -99,4 +102,13 @@ func TestGeneratingArgumentForEnvironmentVariable(t *testing.T) {
|
|||
t.Fail()
|
||||
return
|
||||
}
|
||||
|
||||
zapLogger, err := zap.NewDevelopment()
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
log := zapr.NewLogger(zapLogger)
|
||||
log.Info("JPB test", "key", "value")
|
||||
t.Fail()
|
||||
}
|
||||
|
|
|
@ -23,7 +23,10 @@ go 1.16
|
|||
|
||||
require (
|
||||
github.com/fsnotify/fsnotify v1.5.0
|
||||
github.com/go-logr/logr v0.4.0
|
||||
github.com/go-logr/zapr v0.4.0
|
||||
github.com/spf13/pflag v1.0.5
|
||||
go.uber.org/zap v1.19.0
|
||||
k8s.io/api v0.20.2
|
||||
k8s.io/apimachinery v0.20.2
|
||||
k8s.io/client-go v0.20.2
|
||||
|
|
|
@ -36,6 +36,8 @@ github.com/NYTimes/gziphandler v0.0.0-20170623195520-56545f4a5d46/go.mod h1:3wb0
|
|||
github.com/PuerkitoBio/purell v1.1.1/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
|
||||
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE=
|
||||
github.com/asaskevich/govalidator v0.0.0-20190424111038-f61b66f89f4a/go.mod h1:lB+ZfQJz7igIIfQNfa7Ml4HSf2uFQQRzpGGRXenZAgY=
|
||||
github.com/benbjohnson/clock v1.1.0 h1:Q92kusRqC1XV2MjkWETPvjJVqKetz1OzxZB7mHJLju8=
|
||||
github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA=
|
||||
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
|
||||
github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
|
||||
github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
|
||||
|
@ -62,8 +64,11 @@ github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9
|
|||
github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8=
|
||||
github.com/go-gl/glfw/v3.3/glfw v0.0.0-20200222043503-6f7a984d4dc4/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8=
|
||||
github.com/go-logr/logr v0.1.0/go.mod h1:ixOQHD9gLJUVQQ2ZOR7zLEifBX6tGkNJF4QyIY7sIas=
|
||||
github.com/go-logr/logr v0.2.0 h1:QvGt2nLcHH0WK9orKa+ppBPAxREcH364nPUedEpK0TY=
|
||||
github.com/go-logr/logr v0.2.0/go.mod h1:z6/tIYblkpsD+a4lm/fGIIU9mZ+XfAiaFtq7xTgseGU=
|
||||
github.com/go-logr/logr v0.4.0 h1:K7/B1jt6fIBQVd4Owv2MqGQClcgf0R266+7C/QjRcLc=
|
||||
github.com/go-logr/logr v0.4.0/go.mod h1:z6/tIYblkpsD+a4lm/fGIIU9mZ+XfAiaFtq7xTgseGU=
|
||||
github.com/go-logr/zapr v0.4.0 h1:uc1uML3hRYL9/ZZPdgHS/n8Nzo+eaYL/Efxkkamf7OM=
|
||||
github.com/go-logr/zapr v0.4.0/go.mod h1:tabnROwaDl0UNxkVeFRbY8bwB37GwRv0P8lg6aAiEnk=
|
||||
github.com/go-openapi/jsonpointer v0.19.2/go.mod h1:3akKfEdA7DF1sugOqz1dVQHBcuDBPKZGEoHC/NkiQRg=
|
||||
github.com/go-openapi/jsonpointer v0.19.3/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg=
|
||||
github.com/go-openapi/jsonreference v0.19.2/go.mod h1:jMjeRr2HHw6nAVajTXJ4eiUwohSTlpa0o73RUL1owJc=
|
||||
|
@ -157,6 +162,8 @@ github.com/onsi/ginkgo v1.11.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+
|
|||
github.com/onsi/gomega v0.0.0-20170829124025-dcabb60a477c/go.mod h1:C1qb7wdrVGGVU+Z6iS04AVkA3Q65CEZX59MT0QO5uiA=
|
||||
github.com/onsi/gomega v1.7.0/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY=
|
||||
github.com/peterbourgon/diskv v2.0.1+incompatible/go.mod h1:uqqh8zWWbv1HBMNONnaR/tNboyR3/BZd58JJSHlUSCU=
|
||||
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
|
||||
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
|
||||
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
|
@ -170,12 +177,21 @@ github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+
|
|||
github.com/stretchr/objx v0.2.0/go.mod h1:qt09Ya8vawLte6SNmTgCsAVtYtaKzEcn8ATUoHMkEqE=
|
||||
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
||||
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
|
||||
github.com/stretchr/testify v1.6.1 h1:hDPOHmpOpP40lSULcqw7IrRb/u7w6RpDC9399XyoNd0=
|
||||
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||
github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
|
||||
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||
go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU=
|
||||
go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8=
|
||||
go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw=
|
||||
go.opencensus.io v0.22.3/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw=
|
||||
go.uber.org/atomic v1.7.0 h1:ADUqmZGgLDDfbSL9ZmPxKTybcoEYHgpYfELNoN+7hsw=
|
||||
go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc=
|
||||
go.uber.org/goleak v1.1.10 h1:z+mqJhf6ss6BSfSM671tgKyZBFPTTJM+HLxnhPC3wu0=
|
||||
go.uber.org/goleak v1.1.10/go.mod h1:8a7PlsEVH3e/a/GLqe5IIrQx6GzcnRmZEufDUTk4A7A=
|
||||
go.uber.org/multierr v1.6.0 h1:y6IPFStTAIT5Ytl7/XYmHvzXQ7S3g/IeZW9hyZ5thw4=
|
||||
go.uber.org/multierr v1.6.0/go.mod h1:cdWPpRnG4AhwMwsgIHip0KRBQjJy5kYEpYjJxpXp9iU=
|
||||
go.uber.org/zap v1.19.0 h1:mZQZefskPPCMIBCSEH0v2/iUqqLrYtaeqwD6FUGUnFE=
|
||||
go.uber.org/zap v1.19.0/go.mod h1:xg/QME4nWcxGxrpdeYfq7UvYrLh66cuVKdrbD1XF/NI=
|
||||
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
||||
golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
|
||||
golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
|
||||
|
@ -205,6 +221,7 @@ golang.org/x/lint v0.0.0-20190909230951-414d861bb4ac/go.mod h1:6SW0HCj/g11FgYtHl
|
|||
golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
|
||||
golang.org/x/lint v0.0.0-20191125180803-fdd1cda4f05f/go.mod h1:5qLYkcX4OjUUV8bRuDixDT3tpyyb+LUpUlRWLxfhWrs=
|
||||
golang.org/x/lint v0.0.0-20200130185559-910be7a94367/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY=
|
||||
golang.org/x/lint v0.0.0-20200302205851-738671d3881b h1:Wh+f8QHJXR411sJR8/vRBTZ7YapZaRvUcLFFJhusH0k=
|
||||
golang.org/x/lint v0.0.0-20200302205851-738671d3881b/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY=
|
||||
golang.org/x/mobile v0.0.0-20190312151609-d3739f865fa6/go.mod h1:z+o9i4GpDbdi3rU15maQ/Ox0txvL9dWGYEHz965HBQE=
|
||||
golang.org/x/mobile v0.0.0-20190719004257-d2bd2a29d028/go.mod h1:E/iHnbuqvinMTCcRqshq8CkpyQDoeVncDDYHnLhea+o=
|
||||
|
@ -303,6 +320,7 @@ golang.org/x/tools v0.0.0-20190628153133-6cdbf07be9d0/go.mod h1:/rFqwRUd4F7ZHNgw
|
|||
golang.org/x/tools v0.0.0-20190816200558-6889da9d5479/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
|
||||
golang.org/x/tools v0.0.0-20190911174233-4f2ddba30aff/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
|
||||
golang.org/x/tools v0.0.0-20191012152004-8de300cfc20a/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
|
||||
golang.org/x/tools v0.0.0-20191108193012-7d206e10da11/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
|
||||
golang.org/x/tools v0.0.0-20191113191852-77e3bb0ad9e7/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
|
||||
golang.org/x/tools v0.0.0-20191115202509-3a792d9c32b2/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
|
||||
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
|
||||
|
@ -317,6 +335,7 @@ golang.org/x/tools v0.0.0-20200204074204-1cc6d1ef6c74/go.mod h1:TB2adYChydJhpapK
|
|||
golang.org/x/tools v0.0.0-20200207183749-b753a1ba74fa/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
|
||||
golang.org/x/tools v0.0.0-20200212150539-ea181f53ac56/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
|
||||
golang.org/x/tools v0.0.0-20200224181240-023911ca70b2/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
|
||||
golang.org/x/tools v0.0.0-20200304193943-95d2e580d8eb h1:iKlO7ROJc6SttHKlxzwGytRtBUqX4VARrNTgP2YLX5M=
|
||||
golang.org/x/tools v0.0.0-20200304193943-95d2e580d8eb/go.mod h1:o4KQGtdN14AW+yjsvvwRTJJuXz8XRtIHtEnmAXLyFUw=
|
||||
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
|
@ -388,8 +407,9 @@ gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
|||
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
||||
gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10=
|
||||
gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
||||
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo=
|
||||
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b h1:h8qDotaEPuJATrMmW04NCwg7v22aHH28wwpauUhK9Oo=
|
||||
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
|
||||
honnef.co/go/tools v0.0.0-20190106161140-3f1c8253044a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
|
||||
honnef.co/go/tools v0.0.0-20190418001031-e561f6794a2a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
|
||||
|
|
|
@ -23,10 +23,10 @@ import (
|
|||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"strconv"
|
||||
|
||||
"github.com/go-logr/logr"
|
||||
corev1 "k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
|
@ -61,6 +61,9 @@ type PodClient struct {
|
|||
// TimestampFeed is a channel where the pod client will send updates with
|
||||
// the values from OutdatedConfigMapAnnotation.
|
||||
TimestampFeed chan int64
|
||||
|
||||
// Logger is the logger we use for this client.
|
||||
Logger logr.Logger
|
||||
}
|
||||
|
||||
// CreatePodClient creates a new client for working with the pod object.
|
||||
|
@ -155,7 +158,7 @@ func (client *PodClient) watchPod() error {
|
|||
if event.Type == watch.Modified {
|
||||
pod, valid := event.Object.(*corev1.Pod)
|
||||
if !valid {
|
||||
log.Printf("Error getting pod information from watch: %v", event)
|
||||
client.Logger.Error(nil, "Error getting pod information from watch", "event", event)
|
||||
}
|
||||
client.processPodUpdate(pod)
|
||||
}
|
||||
|
@ -177,7 +180,7 @@ func (client *PodClient) processPodUpdate(pod *corev1.Pod) {
|
|||
}
|
||||
timestamp, err := strconv.ParseInt(annotation, 10, 64)
|
||||
if err != nil {
|
||||
log.Printf("Error parsing annotation %s: %s", annotation, err)
|
||||
client.Logger.Error(err, "Error parsing annotation", "key", OutdatedConfigMapAnnotation, "rawAnnotation", annotation, err)
|
||||
return
|
||||
}
|
||||
|
||||
|
|
|
@ -22,20 +22,34 @@ package main
|
|||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/go-logr/zapr"
|
||||
"github.com/spf13/pflag"
|
||||
"go.uber.org/zap"
|
||||
)
|
||||
|
||||
var (
|
||||
inputDir string
|
||||
fdbserverPath string
|
||||
monitorConfFile string
|
||||
logPath string
|
||||
)
|
||||
|
||||
func main() {
|
||||
pflag.StringVar(&fdbserverPath, "fdbserver-path", "/usr/bin/fdbserver", "Path to the fdbserver binary")
|
||||
pflag.StringVar(&inputDir, "input-dir", ".", "Directory containing input files")
|
||||
pflag.StringVar(&monitorConfFile, "input-monitor-conf", "config.json", "Name of the file in the input directory that contains the monitor configuration")
|
||||
pflag.StringVar(&logPath, "log-path", "", "Name of a file to send logs to. Logs will be sent to stdout in addition the file you pass in this argument. If this is blank, logs will only by sent to stdout")
|
||||
pflag.Parse()
|
||||
|
||||
StartMonitor(fmt.Sprintf("%s/%s", inputDir, monitorConfFile), fdbserverPath)
|
||||
zapConfig := zap.NewProductionConfig()
|
||||
if logPath != "" {
|
||||
zapConfig.OutputPaths = append(zapConfig.OutputPaths, logPath)
|
||||
}
|
||||
zapLogger, err := zapConfig.Build()
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
logger := zapr.NewLogger(zapLogger)
|
||||
StartMonitor(logger, fmt.Sprintf("%s/%s", inputDir, monitorConfFile), fdbserverPath)
|
||||
}
|
||||
|
|
|
@ -20,9 +20,9 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"encoding/json"
|
||||
"io"
|
||||
"log"
|
||||
"os"
|
||||
"os/exec"
|
||||
"os/signal"
|
||||
|
@ -31,11 +31,14 @@ import (
|
|||
"time"
|
||||
|
||||
"github.com/fsnotify/fsnotify"
|
||||
"github.com/go-logr/logr"
|
||||
)
|
||||
|
||||
// errorBackoffSeconds is the time to wait after a process fails before starting
|
||||
// another process.
|
||||
const errorBackoffSeconds = 5
|
||||
// This delay will only be applied when there has been more than one failure
|
||||
// within this time window.
|
||||
const errorBackoffSeconds = 60
|
||||
|
||||
// Monitor provides the main monitor loop
|
||||
type Monitor struct {
|
||||
|
@ -60,7 +63,7 @@ type Monitor struct {
|
|||
// zero will indicate that a process does not have a run loop. A PID of -1
|
||||
// will indicate that a process has a run loop but is not currently running
|
||||
// the subprocess.
|
||||
ProcessesIDs []int
|
||||
ProcessIDs []int
|
||||
|
||||
// Mutex defines a mutex around working with configuration.
|
||||
Mutex sync.Mutex
|
||||
|
@ -68,10 +71,13 @@ type Monitor struct {
|
|||
// PodClient is a client for posting updates about this pod to
|
||||
// Kubernetes.
|
||||
PodClient *PodClient
|
||||
|
||||
// Logger is the logger instance for this monitor.
|
||||
Logger logr.Logger
|
||||
}
|
||||
|
||||
// StartMonitor starts the monitor loop.
|
||||
func StartMonitor(configFile string, fdbserverPath string) {
|
||||
func StartMonitor(logger logr.Logger, configFile string, fdbserverPath string) {
|
||||
podClient, err := CreatePodClient()
|
||||
if err != nil {
|
||||
panic(err)
|
||||
|
@ -81,6 +87,7 @@ func StartMonitor(configFile string, fdbserverPath string) {
|
|||
ConfigFile: configFile,
|
||||
FDBServerPath: fdbserverPath,
|
||||
PodClient: podClient,
|
||||
Logger: logger,
|
||||
}
|
||||
|
||||
go func() { monitor.WatchPodTimestamps() }()
|
||||
|
@ -91,36 +98,36 @@ func StartMonitor(configFile string, fdbserverPath string) {
|
|||
func (monitor *Monitor) LoadConfiguration() {
|
||||
file, err := os.Open(monitor.ConfigFile)
|
||||
if err != nil {
|
||||
log.Print(err.Error())
|
||||
monitor.Logger.Error(err, "Error reading monitor config file", "monitorConfigPath", monitor.ConfigFile)
|
||||
return
|
||||
}
|
||||
defer file.Close()
|
||||
configuration := &ProcessConfiguration{}
|
||||
configurationBytes, err := io.ReadAll(file)
|
||||
if err != nil {
|
||||
log.Print(err.Error())
|
||||
monitor.Logger.Error(err, "Error reading monitor configuration", "monitorConfigPath", monitor.ConfigFile)
|
||||
}
|
||||
err = json.Unmarshal(configurationBytes, configuration)
|
||||
if err != nil {
|
||||
log.Print(err)
|
||||
monitor.Logger.Error(err, "Error parsing monitor configuration", "rawConfiguration", string(configurationBytes))
|
||||
return
|
||||
}
|
||||
|
||||
_, err = configuration.GenerateArguments(1, nil)
|
||||
if err != nil {
|
||||
log.Print(err)
|
||||
monitor.Logger.Error(err, "Error generating arguments for latest configuration", "configuration", configuration)
|
||||
return
|
||||
}
|
||||
|
||||
log.Printf("Received new configuration file")
|
||||
monitor.Logger.Info("Received new configuration file", "configuration", configuration)
|
||||
monitor.Mutex.Lock()
|
||||
defer monitor.Mutex.Unlock()
|
||||
|
||||
if monitor.ProcessesIDs == nil {
|
||||
monitor.ProcessesIDs = make([]int, configuration.ServerCount+1)
|
||||
if monitor.ProcessIDs == nil {
|
||||
monitor.ProcessIDs = make([]int, configuration.ServerCount+1)
|
||||
} else {
|
||||
for len(monitor.ProcessesIDs) <= configuration.ServerCount {
|
||||
monitor.ProcessesIDs = append(monitor.ProcessesIDs, 0)
|
||||
for len(monitor.ProcessIDs) <= configuration.ServerCount {
|
||||
monitor.ProcessIDs = append(monitor.ProcessIDs, 0)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -129,8 +136,8 @@ func (monitor *Monitor) LoadConfiguration() {
|
|||
monitor.LastConfigurationTime = time.Now()
|
||||
|
||||
for processNumber := 1; processNumber <= configuration.ServerCount; processNumber++ {
|
||||
if monitor.ProcessesIDs[processNumber] == 0 {
|
||||
monitor.ProcessesIDs[processNumber] = -1
|
||||
if monitor.ProcessIDs[processNumber] == 0 {
|
||||
monitor.ProcessIDs[processNumber] = -1
|
||||
tempNumber := processNumber
|
||||
go func() { monitor.RunProcess(tempNumber) }()
|
||||
}
|
||||
|
@ -138,18 +145,20 @@ func (monitor *Monitor) LoadConfiguration() {
|
|||
|
||||
err = monitor.PodClient.UpdateAnnotations(monitor)
|
||||
if err != nil {
|
||||
log.Printf("Error updating pod annotations: %s", err)
|
||||
monitor.Logger.Error(err, "Error updating pod annotations")
|
||||
}
|
||||
}
|
||||
|
||||
// RunProcess runs a loop to continually start and watch a process.
|
||||
func (monitor *Monitor) RunProcess(processNumber int) {
|
||||
log.Printf("Starting run loop for subprocess %d", processNumber)
|
||||
pid := 0
|
||||
logger := monitor.Logger.WithValues("processNumber", processNumber, "area", "RunProcess")
|
||||
logger.Info("Starting run loop")
|
||||
for {
|
||||
monitor.Mutex.Lock()
|
||||
if monitor.ActiveConfiguration.ServerCount < processNumber {
|
||||
log.Printf("Terminating run loop for subprocess %d", processNumber)
|
||||
monitor.ProcessesIDs[processNumber] = 0
|
||||
logger.Info("Terminating run loop")
|
||||
monitor.ProcessIDs[processNumber] = 0
|
||||
monitor.Mutex.Unlock()
|
||||
return
|
||||
}
|
||||
|
@ -158,42 +167,85 @@ func (monitor *Monitor) RunProcess(processNumber int) {
|
|||
arguments, err := monitor.ActiveConfiguration.GenerateArguments(processNumber, nil)
|
||||
arguments = append([]string{monitor.FDBServerPath}, arguments...)
|
||||
if err != nil {
|
||||
log.Print(err)
|
||||
logger.Error(err, "Error generating arguments for subprocess", "configuration", monitor.ActiveConfiguration)
|
||||
time.Sleep(errorBackoffSeconds * time.Second)
|
||||
}
|
||||
cmd := exec.Cmd{
|
||||
Path: arguments[0],
|
||||
Args: arguments,
|
||||
Stdout: os.Stdout,
|
||||
Stderr: os.Stderr,
|
||||
Path: arguments[0],
|
||||
Args: arguments,
|
||||
}
|
||||
|
||||
logger.Info("Starting subprocess", "arguments", arguments)
|
||||
|
||||
stdout, err := cmd.StdoutPipe()
|
||||
if err != nil {
|
||||
logger.Error(err, "Error getting stdout from subprocess")
|
||||
}
|
||||
|
||||
stderr, err := cmd.StderrPipe()
|
||||
if err != nil {
|
||||
logger.Error(err, "Error getting stderr from subprocess")
|
||||
}
|
||||
|
||||
log.Printf("Starting subprocess #%d: %v", processNumber, arguments)
|
||||
err = cmd.Start()
|
||||
if err != nil {
|
||||
log.Printf("Error from subprocess %d: %s", processNumber, err.Error())
|
||||
log.Printf("Subprocess #%d will restart in %d seconds", processNumber, errorBackoffSeconds)
|
||||
logger.Error(err, "Error starting subprocess")
|
||||
time.Sleep(errorBackoffSeconds * time.Second)
|
||||
continue
|
||||
}
|
||||
|
||||
monitor.Mutex.Lock()
|
||||
monitor.ProcessesIDs[processNumber] = cmd.Process.Pid
|
||||
monitor.Mutex.Unlock()
|
||||
|
||||
err = cmd.Wait()
|
||||
log.Printf("Subprocess #%d terminated", processNumber)
|
||||
|
||||
if err != nil {
|
||||
log.Printf("Error from subprocess #%d: %s", processNumber, err.Error())
|
||||
if cmd.Process != nil {
|
||||
pid = cmd.Process.Pid
|
||||
} else {
|
||||
logger.Error(nil, "No Process information availale for subprocess")
|
||||
}
|
||||
|
||||
startTime := time.Now()
|
||||
logger.Info("Subprocess started", "PID", pid)
|
||||
|
||||
monitor.Mutex.Lock()
|
||||
monitor.ProcessesIDs[processNumber] = -1
|
||||
monitor.ProcessIDs[processNumber] = pid
|
||||
monitor.Mutex.Unlock()
|
||||
|
||||
log.Printf("Subprocess #%d will restart in %d seconds", processNumber, errorBackoffSeconds)
|
||||
time.Sleep(errorBackoffSeconds * time.Second)
|
||||
if stdout != nil {
|
||||
stdoutScanner := bufio.NewScanner(stdout)
|
||||
go func() {
|
||||
for stdoutScanner.Scan() {
|
||||
logger.Info("Subprocess output", "msg", stdoutScanner.Text(), "PID", pid)
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
if stderr != nil {
|
||||
stderrScanner := bufio.NewScanner(stderr)
|
||||
go func() {
|
||||
for stderrScanner.Scan() {
|
||||
logger.Error(nil, "Subprocess error log", "msg", stderrScanner.Text(), "PID", pid)
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
err = cmd.Wait()
|
||||
if err != nil {
|
||||
logger.Error(err, "Error from subprocess", "PID", pid)
|
||||
}
|
||||
exitCode := -1
|
||||
if cmd.ProcessState != nil {
|
||||
exitCode = cmd.ProcessState.ExitCode()
|
||||
}
|
||||
|
||||
logger.Info("Subprocess terminated", "exitCode", exitCode, "PID", pid)
|
||||
|
||||
endTime := time.Now()
|
||||
monitor.Mutex.Lock()
|
||||
monitor.ProcessIDs[processNumber] = -1
|
||||
monitor.Mutex.Unlock()
|
||||
|
||||
processDuration := endTime.Sub(startTime)
|
||||
if processDuration.Seconds() < errorBackoffSeconds {
|
||||
logger.Info("Backing off from restarting subprocess", "backOffTimeSeconds", errorBackoffSeconds, "lastExecutionDurationSeconds", processDuration)
|
||||
time.Sleep(errorBackoffSeconds * time.Second)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -205,7 +257,7 @@ func (monitor *Monitor) WatchConfiguration(watcher *fsnotify.Watcher) {
|
|||
if !ok {
|
||||
return
|
||||
}
|
||||
log.Printf("Detected event on monitor conf file: %v", event)
|
||||
monitor.Logger.Info("Detected event on monitor conf file", "event", event)
|
||||
if event.Op&fsnotify.Write == fsnotify.Write || event.Op&fsnotify.Create == fsnotify.Create {
|
||||
monitor.LoadConfiguration()
|
||||
} else if event.Op&fsnotify.Remove == fsnotify.Remove {
|
||||
|
@ -219,7 +271,7 @@ func (monitor *Monitor) WatchConfiguration(watcher *fsnotify.Watcher) {
|
|||
if !ok {
|
||||
return
|
||||
}
|
||||
log.Print(err)
|
||||
monitor.Logger.Error(err, "Error watching for file system events")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -232,18 +284,19 @@ func (monitor *Monitor) Run() {
|
|||
|
||||
go func() {
|
||||
latestSignal := <-signals
|
||||
log.Printf("Received signal %v", latestSignal)
|
||||
for processNumber, processID := range monitor.ProcessesIDs {
|
||||
monitor.Logger.Info("Received system signal", "signal", latestSignal)
|
||||
for processNumber, processID := range monitor.ProcessIDs {
|
||||
if processID > 0 {
|
||||
subprocessLogger := monitor.Logger.WithValues("processNumber", processNumber, "PID", processID)
|
||||
process, err := os.FindProcess(processID)
|
||||
if err != nil {
|
||||
log.Printf("Error finding subprocess #%d (PID %d): %s", processNumber, processID, err.Error())
|
||||
subprocessLogger.Error(err, "Error finding subprocess")
|
||||
continue
|
||||
}
|
||||
log.Printf("Sending signal %v to subprocess #%d (PID %d)", latestSignal, processNumber, processID)
|
||||
subprocessLogger.Info("Sending signal to subprocess", "signal", latestSignal)
|
||||
err = process.Signal(latestSignal)
|
||||
if err != nil {
|
||||
log.Printf("Error signaling subprocess #%d (PID %d): %s", processNumber, processID, err.Error())
|
||||
subprocessLogger.Error(err, "Error signaling subprocess")
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
|
|
@ -69,8 +69,15 @@ RUN rm -rf /mnt/website && rm -r /var/fdb/tmp
|
|||
# Install the kubernetes monitor binary
|
||||
COPY --from=go-build /fdb-kubernetes-monitor /usr/bin/
|
||||
|
||||
VOLUME /var/fdb/data
|
||||
# Set up a non-root user
|
||||
|
||||
RUN groupadd --gid 4059 fdb && \
|
||||
useradd --gid 4059 --uid 4059 --no-create-home --shell /bin/bash fdb && \
|
||||
chown -R fdb:fdb /var/fdb
|
||||
|
||||
# Runtime Configuration Options
|
||||
|
||||
USER fdb
|
||||
WORKDIR /var/fdb
|
||||
ENTRYPOINT ["/usr/bin/fdb-kubernetes-monitor"]
|
||||
VOLUME /var/fdb/data
|
||||
|
|
|
@ -23,7 +23,7 @@
|
|||
# This is not a recommended way to run production clusters, but it can be useful
|
||||
# to test the image in development.
|
||||
#
|
||||
# For more information on using this file, see fdbkubernetesmonitor/doc.go
|
||||
# For more information on using this file, see fdbkubernetesmonitor/README.md
|
||||
apiVersion: apps/v1
|
||||
kind: StatefulSet
|
||||
metadata:
|
||||
|
@ -45,6 +45,11 @@ spec:
|
|||
- name: foundationdb
|
||||
image: foundationdb/foundationdb-kubernetes:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
args:
|
||||
- --input-dir
|
||||
- /var/fdb/dynamic-conf
|
||||
- --log-path
|
||||
- /var/fdb/logs/monitor.log
|
||||
env:
|
||||
- name: FDB_POD_NAME
|
||||
valueFrom:
|
||||
|
@ -76,19 +81,20 @@ spec:
|
|||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: metadata.name
|
||||
args:
|
||||
- --input-dir
|
||||
- /var/fdb/dynamic-conf
|
||||
volumeMounts:
|
||||
- name: dynamic-conf
|
||||
mountPath: /var/fdb/dynamic-conf
|
||||
- name: data
|
||||
mountPath: /var/fdb/data
|
||||
- name: logs
|
||||
mountPath: /var/fdb/logs
|
||||
serviceAccountName: fdb-kubernetes-example
|
||||
volumes:
|
||||
- name: dynamic-conf
|
||||
configMap:
|
||||
name: fdb-kubernetes-example-config
|
||||
- name: logs
|
||||
emptyDir: {}
|
||||
volumeClaimTemplates:
|
||||
- metadata:
|
||||
name: data
|
||||
|
@ -142,7 +148,11 @@ data:
|
|||
{"type": "Environment", "source": "FDB_INSTANCE_ID"},
|
||||
{"value": "-"},
|
||||
{"type": "ProcessNumber"}
|
||||
]}
|
||||
]},
|
||||
{"value": "--logdir"},
|
||||
{"value": "/var/fdb/logs"},
|
||||
{"value": "--trace_format"},
|
||||
{"value": "json"}
|
||||
]
|
||||
}
|
||||
---
|
||||
|
@ -176,5 +186,3 @@ roleRef:
|
|||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: fdb-kubernetes-example
|
||||
|
||||
|
Loading…
Reference in New Issue