Skip to content
Draft
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions controllers/om/api/http.go
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,17 @@ func OptionCAValidate(ca string) func(client *Client) error {
}
}

// OptionRetryConfig configures the retry behavior of the HTTP client.
// This is useful for testing to avoid waiting for retries.
func OptionRetryConfig(retryWaitMin, retryWaitMax time.Duration, retryMax int) func(client *Client) error {
return func(client *Client) error {
client.RetryWaitMin = retryWaitMin
client.RetryWaitMax = retryWaitMax
client.RetryMax = retryMax
return nil
}
}

// Request executes an HTTP request, given a series of parameters, over this *Client object.
// It handles Digest when needed and json marshaling of the `v` struct.
func (client *Client) Request(method, hostname, path string, v interface{}) ([]byte, http.Header, error) {
Expand Down
19 changes: 16 additions & 3 deletions controllers/om/omclient.go
Original file line number Diff line number Diff line change
Expand Up @@ -179,9 +179,10 @@ type OMContext struct {
}

type HTTPOmConnection struct {
context *OMContext
once sync.Once
client *api.Client
context *OMContext
once sync.Once
client *api.Client
clientOpts []func(*api.Client) error // Additional options for the HTTP client (e.g., for testing)
}

func (oc *HTTPOmConnection) ReadUpdateAgentsLogRotation(logRotateSetting mdbv1.AgentConfig, log *zap.SugaredLogger) error {
Expand Down Expand Up @@ -283,6 +284,15 @@ func NewOpsManagerConnection(context *OMContext) Connection {
}
}

// NewOpsManagerConnectionWithOptions creates a connection with custom HTTP client options.
// This is useful for testing to configure retry behavior.
func NewOpsManagerConnectionWithOptions(context *OMContext, clientOpts ...func(*api.Client) error) Connection {
return &HTTPOmConnection{
context: context,
clientOpts: clientOpts,
}
}

func (oc *HTTPOmConnection) ReadGroupBackupConfig() (backup.GroupBackupConfig, error) {
ans, apiErr := oc.get(fmt.Sprintf("/api/public/v1.0/admin/backup/groups/%s", oc.GroupID()))

Expand Down Expand Up @@ -1064,6 +1074,9 @@ func (oc *HTTPOmConnection) getHTTPClient() (*api.Client, error) {
opts = append(opts, api.OptionDebug)
}

// Add any custom client options (e.g., for testing)
opts = append(opts, oc.clientOpts...)

oc.client, err = api.NewHTTPClient(opts...)
})

Expand Down
6 changes: 5 additions & 1 deletion controllers/om/omclient_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"github.com/stretchr/testify/assert"
"go.uber.org/zap"

"github.com/mongodb/mongodb-kubernetes/controllers/om/api"
"github.com/mongodb/mongodb-kubernetes/pkg/util"
)

Expand Down Expand Up @@ -103,7 +104,10 @@ func TestRetriesOnWritingAutomationConfig(t *testing.T) {
srv := serverMock(handleFunc)
defer srv.Close()

connection := NewOpsManagerConnection(&OMContext{BaseURL: srv.URL, GroupID: "1"})
connection := NewOpsManagerConnectionWithOptions(
&OMContext{BaseURL: srv.URL, GroupID: "1"},
api.OptionRetryConfig(0, 0, 3), // No delay between retries, still retry 3 times
)
err := connection.ReadUpdateAutomationConfig(func(ac *AutomationConfig) error {
return nil
}, logger)
Expand Down
10 changes: 6 additions & 4 deletions controllers/operator/create/create.go
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ func HandlePVCResize(ctx context.Context, memberClient kubernetesClient.Client,
return workflow.Failed(xerrors.Errorf("error deleting sts, err: %s", err))
}

deletedIsStatefulset := checkStatefulsetIsDeleted(ctx, memberClient, desiredSts, 1*time.Second, log)
deletedIsStatefulset, _ := checkStatefulsetIsDeleted(ctx, memberClient, desiredSts, 1*time.Second, log)

if !deletedIsStatefulset {
log.Info("deletion has not been reflected in kube yet, restarting the reconcile")
Expand All @@ -164,11 +164,13 @@ func HandlePVCResize(ctx context.Context, memberClient kubernetesClient.Client,
return workflow.OK()
}

func checkStatefulsetIsDeleted(ctx context.Context, memberClient kubernetesClient.Client, desiredSts *appsv1.StatefulSet, sleepDuration time.Duration, log *zap.SugaredLogger) bool {
func checkStatefulsetIsDeleted(ctx context.Context, memberClient kubernetesClient.Client, desiredSts *appsv1.StatefulSet, sleepDuration time.Duration, log *zap.SugaredLogger) (bool, int) {
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

returning retries makes testing easier

// After deleting the statefulset it can take seconds to be reflected in kubernetes.
// In case it is still not reflected
deletedIsStatefulset := false
for i := 0; i < 3; i++ {
retries := 0
for retries < 3 {
retries += 1
time.Sleep(sleepDuration)
_, stsErr := memberClient.GetStatefulSet(ctx, kube.ObjectKey(desiredSts.Namespace, desiredSts.Name))
if apiErrors.IsNotFound(stsErr) {
Expand All @@ -178,7 +180,7 @@ func checkStatefulsetIsDeleted(ctx context.Context, memberClient kubernetesClien
log.Info("Statefulset still exists, attempting again")
}
}
return deletedIsStatefulset
return deletedIsStatefulset, retries
}

func hasFinishedResizing(ctx context.Context, memberClient kubernetesClient.Client, desiredSts *appsv1.StatefulSet) (bool, error) {
Expand Down
78 changes: 42 additions & 36 deletions controllers/operator/create/create_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ import (
"context"
"fmt"
"strings"
"sync"
"testing"
"testing/synctest"
"time"

"github.com/stretchr/testify/assert"
Expand Down Expand Up @@ -1397,7 +1397,7 @@ func TestGetMatchingPVCTemplateFromSTS(t *testing.T) {

func TestCheckStatefulsetIsDeleted(t *testing.T) {
ctx := context.TODO()
sleepDuration := 10 * time.Millisecond
sleepDuration := 100 * time.Millisecond
log := zap.NewNop().Sugar()

namespace := "default"
Expand All @@ -1411,52 +1411,58 @@ func TestCheckStatefulsetIsDeleted(t *testing.T) {
}

t.Run("StatefulSet is deleted", func(t *testing.T) {
fakeClient, _ := mock.NewDefaultFakeClient()
err := fakeClient.CreateStatefulSet(ctx, *desiredSts)
assert.NoError(t, err)
synctest.Test(t, func(t *testing.T) {
fakeClient, _ := mock.NewDefaultFakeClient()
err := fakeClient.CreateStatefulSet(ctx, *desiredSts)
assert.NoError(t, err)

// Simulate the deletion by deleting the StatefulSet
err = fakeClient.DeleteStatefulSet(ctx, kube.ObjectKey(desiredSts.Namespace, desiredSts.Name))
assert.NoError(t, err)
// Delete before calling the function
err = fakeClient.DeleteStatefulSet(ctx, kube.ObjectKey(desiredSts.Namespace, desiredSts.Name))
assert.NoError(t, err)

// Check if the StatefulSet is detected as deleted
result := checkStatefulsetIsDeleted(ctx, fakeClient, desiredSts, sleepDuration, log)
result, retries := checkStatefulsetIsDeleted(ctx, fakeClient, desiredSts, sleepDuration, log)

assert.True(t, result, "StatefulSet should be detected as deleted")
assert.True(t, result, "StatefulSet should be detected as deleted")
assert.Equal(t, 1, retries, "Should find deletion on first retry")
})
})

t.Run("StatefulSet is not deleted", func(t *testing.T) {
fakeClient, _ := mock.NewDefaultFakeClient()
err := fakeClient.CreateStatefulSet(ctx, *desiredSts)
assert.NoError(t, err)
t.Run("StatefulSet is not deleted and then delete it", func(t *testing.T) {
synctest.Test(t, func(t *testing.T) {
fakeClient, _ := mock.NewDefaultFakeClient()
err := fakeClient.CreateStatefulSet(ctx, *desiredSts)
assert.NoError(t, err)

// Do not delete the StatefulSet, to simulate it still existing
// Check if the StatefulSet is detected as not deleted
result := checkStatefulsetIsDeleted(ctx, fakeClient, desiredSts, sleepDuration, log)
result, retries := checkStatefulsetIsDeleted(ctx, fakeClient, desiredSts, sleepDuration, log)

assert.False(t, result, "StatefulSet should not be detected as deleted")
})
assert.False(t, result, "StatefulSet should not be detected as deleted")
assert.Equal(t, 3, retries, "Should exhaust all retries")

t.Run("StatefulSet is deleted after some retries", func(t *testing.T) {
fakeClient, _ := mock.NewDefaultFakeClient()
err := fakeClient.CreateStatefulSet(ctx, *desiredSts)
assert.NoError(t, err)

var wg sync.WaitGroup
wg.Add(1)
// Use a goroutine to delete the StatefulSet after a delay, making it race-safe
go func() {
defer wg.Done()
time.Sleep(20 * time.Millisecond) // Wait for a bit longer than the first sleep
err = fakeClient.DeleteStatefulSet(ctx, kube.ObjectKey(desiredSts.Namespace, desiredSts.Name))
assert.NoError(t, err)
}()

// Check if the StatefulSet is detected as deleted after retries
result := checkStatefulsetIsDeleted(ctx, fakeClient, desiredSts, sleepDuration, log)
result, retries = checkStatefulsetIsDeleted(ctx, fakeClient, desiredSts, sleepDuration, log)

assert.True(t, result, "StatefulSet should be detected as deleted")
assert.Equal(t, 1, retries, "Should find deletion on first retry")
})
})

t.Run("StatefulSet is deleted after second retry", func(t *testing.T) {
synctest.Test(t, func(t *testing.T) {
fakeClient, _ := mock.NewDefaultFakeClient()
err := fakeClient.CreateStatefulSet(ctx, *desiredSts)
assert.NoError(t, err)

go func() {
time.Sleep(150 * time.Millisecond)
_ = fakeClient.DeleteStatefulSet(ctx, kube.ObjectKey(desiredSts.Namespace, desiredSts.Name))
}()

wg.Wait()
result, retries := checkStatefulsetIsDeleted(ctx, fakeClient, desiredSts, sleepDuration, log)

assert.True(t, result, "StatefulSet should be detected as deleted after retries")
assert.True(t, result, "StatefulSet should be detected as deleted")
assert.Equal(t, 2, retries, "Should find deletion on second retry")
})
})
}
Loading