Skip to content

Commit 0e0bb40

Browse files
authored
feat: Configure proxy container for graceful termination. (#425)
Add the following configuration to the workload pods so that the proxy can gracefully exit when the main container is done. Configure the proxy container to exit 0 when it is terminated. Send a SIGTERM to the proxy container. We want the proxy to exit with code 0, indicating a clean termination. Without this change the proxy container would exit with exit code 140 (meaning terminated), which would cause kubernetes to report the pod as "exited in an error state." Configure a workload lifecycle handler so that kubernetes calls GET /quitquitquit before terminating the proxy container. This should give the proxy container the chance to exit gracefully before kubernetes sends a SIGTERM to the proxy process. Always enable the /quitquitquit proxy api. Always set the CSQL_QUIT_URLS environment variable to a space-separated list of proxy quitquitquit urls. This way, when the main workload container is ready to exit, it can on workload pods. When a job or cronjob's main process is done, that container can iterate over ``` echo Starting job # execute the job process run_job # Tell proxy containers to shut down gracefully for url in $CSQL_QUIT_URLS ; do wget --post-data '' $url done ``` Fixes #361
1 parent dc06ceb commit 0e0bb40

File tree

3 files changed

+168
-15
lines changed

3 files changed

+168
-15
lines changed

internal/testhelpers/resources.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,13 @@ func BuildJob(name types.NamespacedName, appLabel string) *batchv1.Job {
295295
},
296296
}
297297
job.Spec.Template.Spec.RestartPolicy = corev1.RestartPolicyNever
298+
podCmd := fmt.Sprintf("echo Container 1 is Running \n"+
299+
"sleep %d \n"+
300+
"for url in $CSQL_QUIT_URLS ; do \n"+
301+
" wget --post-data '' $url \n"+
302+
"done", 30)
303+
job.Spec.Template.Spec.Containers[0].Command = []string{"sh", "-c", podCmd}
304+
298305
return job
299306
}
300307

@@ -322,6 +329,12 @@ func BuildCronJob(name types.NamespacedName, appLabel string) *batchv1.CronJob {
322329
},
323330
}
324331
job.Spec.JobTemplate.Spec.Template.Spec.RestartPolicy = corev1.RestartPolicyNever
332+
podCmd := fmt.Sprintf("echo Container 1 is Running \n"+
333+
"sleep %d \n"+
334+
"for url in $CSQL_QUIT_URLS ; do \n"+
335+
" wget --post-data '' $url \n"+
336+
"done", 30)
337+
job.Spec.JobTemplate.Spec.Template.Spec.Containers[0].Command = []string{"sh", "-c", podCmd}
325338
return job
326339

327340
}
@@ -694,6 +707,10 @@ func (cc *TestCaseClient) ConfigureResources(proxy *cloudsqlapi.AuthProxyWorkloa
694707
corev1.ResourceCPU: *resource.NewMilliQuantity(500, resource.DecimalExponent),
695708
},
696709
},
710+
AdminServer: &cloudsqlapi.AdminServerSpec{
711+
Port: 9092,
712+
EnableAPIs: []string{"QuitQuitQuit"},
713+
},
697714
}
698715
}
699716

internal/workload/podspec_updates.go

Lines changed: 52 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,9 @@ const (
4949
// and kubernetes health checks.
5050
DefaultHealthCheckPort int32 = 9801
5151

52-
// DefaultAdminPort is the used by the proxy to expose prometheus
53-
// and kubernetes health checks.
54-
DefaultAdminPort int32 = 9802
52+
// DefaultAdminPort is the used by the proxy to expose the quitquitquit
53+
// and debug api endpoints
54+
DefaultAdminPort int32 = 9091
5555
)
5656

5757
var l = logf.Log.WithName("internal.workload")
@@ -308,6 +308,7 @@ type workloadMods struct {
308308
EnvVars []*managedEnvVar `json:"envVars"`
309309
VolumeMounts []*managedVolume `json:"volumeMounts"`
310310
Ports []*managedPort `json:"ports"`
311+
AdminPorts []int32 `json:"adminPorts"`
311312
}
312313

313314
func (s *updateState) addWorkloadPort(p int32) {
@@ -394,6 +395,24 @@ func (s *updateState) useInstancePort(p *cloudsqlapi.AuthProxyWorkload, is *clou
394395
return port
395396
}
396397

398+
func (s *updateState) addAdminPort(p int32) {
399+
s.mods.AdminPorts = append(s.mods.AdminPorts, p)
400+
}
401+
402+
func (s *updateState) addQuitEnvVar() {
403+
urls := make([]string, len(s.mods.AdminPorts))
404+
for i := 0; i < len(s.mods.AdminPorts); i++ {
405+
urls[i] = fmt.Sprintf("http://localhost:%d/quitquitquit", s.mods.AdminPorts[i])
406+
}
407+
v := strings.Join(urls, " ")
408+
409+
s.addEnvVar(nil, managedEnvVar{
410+
OperatorManagedValue: corev1.EnvVar{
411+
Name: "CSQL_QUIT_URLS",
412+
Value: v,
413+
}})
414+
}
415+
397416
func (s *updateState) addPort(p int32, instance proxyInstanceID) {
398417
var mp *managedPort
399418

@@ -528,6 +547,8 @@ func (s *updateState) update(wl *PodWorkload, matches []*cloudsqlapi.AuthProxyWo
528547
k, v := s.updater.PodAnnotation(inst)
529548
ann[k] = v
530549
}
550+
// Add the envvar containing the proxy quit urls to the workloads
551+
s.addQuitEnvVar()
531552

532553
podSpec.Containers = containers
533554

@@ -778,8 +799,9 @@ func (s *updateState) updateContainerEnv(c *corev1.Container) {
778799
}
779800

780801
// addHealthCheck adds the health check declaration to this workload.
781-
func (s *updateState) addHealthCheck(p *cloudsqlapi.AuthProxyWorkload, c *corev1.Container) {
802+
func (s *updateState) addHealthCheck(p *cloudsqlapi.AuthProxyWorkload, c *corev1.Container) int32 {
782803
var portPtr *int32
804+
var adminPortPtr *int32
783805

784806
cs := p.Spec.AuthProxyContainer
785807

@@ -815,6 +837,32 @@ func (s *updateState) addHealthCheck(p *cloudsqlapi.AuthProxyWorkload, c *corev1
815837
s.addProxyContainerEnvVar(p, "CSQL_PROXY_HTTP_PORT", fmt.Sprintf("%d", port))
816838
s.addProxyContainerEnvVar(p, "CSQL_PROXY_HTTP_ADDRESS", "0.0.0.0")
817839
s.addProxyContainerEnvVar(p, "CSQL_PROXY_HEALTH_CHECK", "true")
840+
// For graceful exits as a sidecar, the proxy should exit with exit code 0
841+
// when it receives a SIGTERM.
842+
s.addProxyContainerEnvVar(p, "CSQL_PROXY_EXIT_ZERO_ON_SIGTERM", "true")
843+
844+
// Also the operator will enable the /quitquitquit endpoint for graceful exit.
845+
// If the AdminServer.Port is set, use it, otherwise use the default
846+
// admin port.
847+
if cs != nil && cs.AdminServer != nil && cs.AdminServer.Port != 0 {
848+
adminPortPtr = &cs.AdminServer.Port
849+
}
850+
adminPort := s.usePort(adminPortPtr, DefaultAdminPort, p)
851+
s.addAdminPort(adminPort)
852+
s.addProxyContainerEnvVar(p, "CSQL_PROXY_QUITQUITQUIT", "true")
853+
s.addProxyContainerEnvVar(p, "CSQL_PROXY_ADMIN_PORT", fmt.Sprintf("%d", adminPort))
854+
855+
// Configure the pre-stop hook for /quitquitquit
856+
c.Lifecycle = &corev1.Lifecycle{
857+
PreStop: &corev1.LifecycleHandler{
858+
HTTPGet: &corev1.HTTPGetAction{
859+
Port: intstr.IntOrString{IntVal: adminPort},
860+
Path: "/quitquitquit",
861+
Host: "localhost",
862+
},
863+
},
864+
}
865+
return adminPort
818866
}
819867

820868
func (s *updateState) addAdminServer(p *cloudsqlapi.AuthProxyWorkload) {
@@ -824,14 +872,10 @@ func (s *updateState) addAdminServer(p *cloudsqlapi.AuthProxyWorkload) {
824872
}
825873

826874
cs := p.Spec.AuthProxyContainer.AdminServer
827-
s.addProxyPort(cs.Port, p)
828-
s.addProxyContainerEnvVar(p, "CSQL_PROXY_ADMIN_PORT", fmt.Sprintf("%d", cs.Port))
829875
for _, name := range cs.EnableAPIs {
830876
switch name {
831877
case "Debug":
832878
s.addProxyContainerEnvVar(p, "CSQL_PROXY_DEBUG", "true")
833-
case "QuitQuitQuit":
834-
s.addProxyContainerEnvVar(p, "CSQL_PROXY_QUITQUITQUIT", "true")
835879
}
836880
}
837881

internal/workload/podspec_updates_test.go

Lines changed: 99 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import (
1818
"fmt"
1919
"reflect"
2020
"strconv"
21+
"strings"
2122
"testing"
2223
"time"
2324

@@ -535,11 +536,14 @@ func TestProxyCLIArgs(t *testing.T) {
535536
}},
536537
},
537538
wantWorkloadEnv: map[string]string{
538-
"CSQL_PROXY_STRUCTURED_LOGS": "true",
539-
"CSQL_PROXY_HEALTH_CHECK": "true",
540-
"CSQL_PROXY_HTTP_PORT": fmt.Sprintf("%d", workload.DefaultHealthCheckPort),
541-
"CSQL_PROXY_HTTP_ADDRESS": "0.0.0.0",
542-
"CSQL_PROXY_USER_AGENT": "cloud-sql-proxy-operator/dev",
539+
"CSQL_PROXY_STRUCTURED_LOGS": "true",
540+
"CSQL_PROXY_HEALTH_CHECK": "true",
541+
"CSQL_PROXY_QUITQUITQUIT": "true",
542+
"CSQL_PROXY_EXIT_ZERO_ON_SIGTERM": "true",
543+
"CSQL_PROXY_HTTP_PORT": fmt.Sprintf("%d", workload.DefaultHealthCheckPort),
544+
"CSQL_PROXY_HTTP_ADDRESS": "0.0.0.0",
545+
"CSQL_PROXY_USER_AGENT": "cloud-sql-proxy-operator/dev",
546+
"CSQL_PROXY_ADMIN_PORT": fmt.Sprintf("%d", workload.DefaultAdminPort),
543547
},
544548
},
545549
{
@@ -691,7 +695,7 @@ func TestProxyCLIArgs(t *testing.T) {
691695
},
692696
},
693697
{
694-
desc: "No admin port enabled when AdminServerSpec is nil",
698+
desc: "Default admin port enabled when AdminServerSpec is nil",
695699
proxySpec: cloudsqlapi.AuthProxyWorkloadSpec{
696700
AuthProxyContainer: &cloudsqlapi.AuthProxyContainerSpec{},
697701
Instances: []cloudsqlapi.InstanceSpec{{
@@ -704,8 +708,9 @@ func TestProxyCLIArgs(t *testing.T) {
704708
},
705709
wantWorkloadEnv: map[string]string{
706710
"CSQL_PROXY_HEALTH_CHECK": "true",
711+
"CSQL_PROXY_ADMIN_PORT": fmt.Sprintf("%d", workload.DefaultAdminPort),
707712
},
708-
dontWantEnvSet: []string{"CSQL_PROXY_DEBUG", "CSQL_PROXY_ADMIN_PORT"},
713+
dontWantEnvSet: []string{"CSQL_PROXY_DEBUG"},
709714
},
710715
{
711716
desc: "port conflict with other instance causes error",
@@ -911,6 +916,93 @@ func TestPodTemplateAnnotations(t *testing.T) {
911916

912917
}
913918

919+
func TestQuitURLEnvVar(t *testing.T) {
920+
921+
var (
922+
u = workload.NewUpdater("cloud-sql-proxy-operator/dev", workload.DefaultProxyImage)
923+
)
924+
925+
// Create a pod
926+
wl := podWorkload()
927+
wl.Pod.Spec.Containers[0].Ports =
928+
[]corev1.ContainerPort{{Name: "http", ContainerPort: 8080}}
929+
930+
// Create a AuthProxyWorkload that matches the deployment
931+
csqls := []*cloudsqlapi.AuthProxyWorkload{
932+
simpleAuthProxy("instance1", "project:server:db"),
933+
simpleAuthProxy("instance2", "project:server2:db2"),
934+
simpleAuthProxy("instance3", "project:server3:db3")}
935+
936+
csqls[0].ObjectMeta.Generation = 1
937+
csqls[1].ObjectMeta.Generation = 2
938+
csqls[2].ObjectMeta.Generation = 3
939+
940+
var wantQuitURLSEnv = strings.Join(
941+
[]string{
942+
fmt.Sprintf("http://localhost:%d/quitquitquit", workload.DefaultAdminPort),
943+
fmt.Sprintf("http://localhost:%d/quitquitquit", workload.DefaultAdminPort+1),
944+
fmt.Sprintf("http://localhost:%d/quitquitquit", workload.DefaultAdminPort+2),
945+
},
946+
" ",
947+
)
948+
949+
// update the containers
950+
err := configureProxies(u, wl, csqls)
951+
if err != nil {
952+
t.Fatal(err)
953+
}
954+
955+
// test that envvar was set
956+
ev, err := findEnvVar(wl, "busybox", "CSQL_QUIT_URLS")
957+
if err != nil {
958+
t.Fatal("can't find env var", err)
959+
}
960+
if ev.Value != wantQuitURLSEnv {
961+
t.Fatal("got", ev.Value, "want", wantQuitURLSEnv)
962+
}
963+
}
964+
965+
func TestPreStopHook(t *testing.T) {
966+
967+
var u = workload.NewUpdater("cloud-sql-proxy-operator/dev", workload.DefaultProxyImage)
968+
969+
// Create a pod
970+
wl := podWorkload()
971+
wl.Pod.Spec.Containers[0].Ports =
972+
[]corev1.ContainerPort{{Name: "http", ContainerPort: 8080}}
973+
974+
// Create a AuthProxyWorkload that matches the deployment
975+
csqls := []*cloudsqlapi.AuthProxyWorkload{
976+
simpleAuthProxy("instance1", "project:server:db")}
977+
978+
csqls[0].ObjectMeta.Generation = 1
979+
980+
// update the containers
981+
err := configureProxies(u, wl, csqls)
982+
if err != nil {
983+
t.Fatal(err)
984+
}
985+
986+
// test that prestop hook was set
987+
c, err := findContainer(wl, workload.ContainerName(csqls[0]))
988+
if err != nil {
989+
t.Fatal("can't find proxy container", err)
990+
}
991+
if c.Lifecycle.PreStop == nil || c.Lifecycle.PreStop.HTTPGet == nil {
992+
t.Fatal("got nil, want lifecycle.preStop.HTTPGet")
993+
}
994+
get := c.Lifecycle.PreStop.HTTPGet
995+
if get.Port.IntVal != workload.DefaultAdminPort {
996+
t.Error("got", get.Port, "want", workload.DefaultAdminPort)
997+
}
998+
if get.Path != "/quitquitquit" {
999+
t.Error("got", get.Path, "want", "/quitquitquit")
1000+
}
1001+
if get.Host != "localhost" {
1002+
t.Error("got", get.Host, "want", "localhost")
1003+
}
1004+
}
1005+
9141006
func TestPodAnnotation(t *testing.T) {
9151007
now := metav1.Now()
9161008
server := &cloudsqlapi.AuthProxyWorkload{ObjectMeta: metav1.ObjectMeta{Name: "instance1", Generation: 1}}

0 commit comments

Comments
 (0)