diff --git a/azureappconfiguration/azureappconfiguration.go b/azureappconfiguration/azureappconfiguration.go index b8c9d65..6a41641 100644 --- a/azureappconfiguration/azureappconfiguration.go +++ b/azureappconfiguration/azureappconfiguration.go @@ -26,6 +26,7 @@ import ( "strings" "sync" "sync/atomic" + "time" "github.com/Azure/AppConfiguration-GoProvider/azureappconfiguration/internal/jsonc" "github.com/Azure/AppConfiguration-GoProvider/azureappconfiguration/internal/refresh" @@ -140,7 +141,7 @@ func Load(ctx context.Context, authentication AuthenticationOptions, options *Op } } - if err := azappcfg.load(ctx); err != nil { + if err := azappcfg.startupWithRetry(ctx, options.StartupOptions.Timeout, azappcfg.load); err != nil { return nil, err } // Set the initial load finished flag @@ -681,6 +682,59 @@ func (azappcfg *AzureAppConfiguration) executeFailoverPolicy(ctx context.Context return fmt.Errorf("failed to get settings from all clients: %v", errors) } +// startupWithRetry implements retry logic for startup loading with timeout and exponential backoff +func (azappcfg *AzureAppConfiguration) startupWithRetry(ctx context.Context, timeout time.Duration, operation func(context.Context) error) error { + // If no timeout is specified, use the default startup timeout + if timeout <= 0 { + timeout = defaultStartupTimeout + } + + // Create a context with timeout for the entire startup process + startupCtx, cancel := context.WithTimeout(ctx, timeout) + defer cancel() + + attempt := 0 + startTime := time.Now() + + for { + attempt++ + + // Try to load with the current context + err := operation(startupCtx) + if err == nil { + return nil + } + + // Check if the error is retriable + if !(isFailoverable(err) || + strings.Contains(err.Error(), "no client is available") || + strings.Contains(err.Error(), "failed to get settings from all clients")) { + return fmt.Errorf("load from Azure App Configuration failed with non-retriable error: %w", err) + } + + // Calculate backoff duration + timeElapsed := time.Since(startTime) + backoffDuration := getFixedBackoffDuration(timeElapsed) + if backoffDuration == 0 { + backoffDuration = calculateBackoffDuration(attempt) + } + + // Check if we have enough time left to wait and retry + timeRemaining := timeout - timeElapsed + if timeRemaining <= backoffDuration { + return fmt.Errorf("load from Azure App Configuration failed after %d attempts within timeout %v: %w", attempt, timeout, err) + } + + // Wait for the backoff duration before retrying + select { + case <-startupCtx.Done(): + return fmt.Errorf("load from Azure App Configuration timed out: %w", startupCtx.Err()) + case <-time.After(backoffDuration): + // Continue to next retry attempt + } + } +} + func (azappcfg *AzureAppConfiguration) trimPrefix(key string) string { result := key for _, prefix := range azappcfg.trimPrefixes { diff --git a/azureappconfiguration/client_manager.go b/azureappconfiguration/client_manager.go index e80c574..1b125fc 100644 --- a/azureappconfiguration/client_manager.go +++ b/azureappconfiguration/client_manager.go @@ -335,17 +335,17 @@ func (client *configurationClientWrapper) updateBackoffStatus(success bool) { client.backOffEndTime = time.Time{} } else { client.failedAttempts++ - client.backOffEndTime = time.Now().Add(client.getBackoffDuration()) + client.backOffEndTime = time.Now().Add(calculateBackoffDuration(client.failedAttempts)) } } -func (client *configurationClientWrapper) getBackoffDuration() time.Duration { - if client.failedAttempts <= 1 { +func calculateBackoffDuration(failedAttempts int) time.Duration { + if failedAttempts <= 1 { return minBackoffDuration } // Cap the exponent to prevent overflow - exponent := math.Min(float64(client.failedAttempts-1), float64(safeShiftLimit)) + exponent := math.Min(float64(failedAttempts-1), float64(safeShiftLimit)) calculatedMilliseconds := float64(minBackoffDuration.Milliseconds()) * math.Pow(2, exponent) if calculatedMilliseconds > float64(maxBackoffDuration.Milliseconds()) || calculatedMilliseconds <= 0 { calculatedMilliseconds = float64(maxBackoffDuration.Milliseconds()) @@ -355,6 +355,21 @@ func (client *configurationClientWrapper) getBackoffDuration() time.Duration { return jitter(calculatedDuration) } +func getFixedBackoffDuration(timeElapsed time.Duration) time.Duration { + if timeElapsed < time.Second*100 { + return time.Second * 5 + } + if timeElapsed < time.Second*200 { + return time.Second * 10 + } + + if timeElapsed < time.Second*600 { + return minBackoffDuration + } + + return 0 +} + func jitter(duration time.Duration) time.Duration { // Calculate the amount of jitter to add to the duration jitter := float64(duration) * jitterRatio diff --git a/azureappconfiguration/constants.go b/azureappconfiguration/constants.go index 5cd3c76..95ff2a1 100644 --- a/azureappconfiguration/constants.go +++ b/azureappconfiguration/constants.go @@ -69,3 +69,8 @@ const ( jitterRatio float64 = 0.25 safeShiftLimit int = 63 ) + +// Startup constants +const ( + defaultStartupTimeout time.Duration = 100 * time.Second +) diff --git a/azureappconfiguration/failover_test.go b/azureappconfiguration/failover_test.go index 665407c..2b06e05 100644 --- a/azureappconfiguration/failover_test.go +++ b/azureappconfiguration/failover_test.go @@ -9,6 +9,7 @@ import ( "fmt" "net" "net/http" + "strings" "testing" "time" @@ -664,17 +665,223 @@ func TestClientWrapper_GetBackoffDuration(t *testing.T) { // First failure should return minimum backoff duration client.failedAttempts = 1 - duration := client.getBackoffDuration() + duration := calculateBackoffDuration(client.failedAttempts) assert.True(t, duration >= minBackoffDuration) assert.True(t, duration <= minBackoffDuration*2) // Account for jitter // Multiple failures should increase duration exponentially client.failedAttempts = 3 - duration3 := client.getBackoffDuration() + duration3 := calculateBackoffDuration(client.failedAttempts) assert.True(t, duration3 > duration) // Very high failure count should be capped at max duration client.failedAttempts = 100 - durationMax := client.getBackoffDuration() + durationMax := calculateBackoffDuration(client.failedAttempts) assert.True(t, durationMax <= maxBackoffDuration*2) // Account for jitter } + +// Test startupWithRetry with successful operation on first attempt +func TestStartupWithRetry_Success_FirstAttempt(t *testing.T) { + mockClientManager := new(mockClientManager) + + azappcfg := &AzureAppConfiguration{ + clientManager: mockClientManager, + keyValues: make(map[string]any), + featureFlags: make(map[string]any), + kvSelectors: []Selector{{KeyFilter: "*", LabelFilter: "\x00"}}, + } + + // Create a successful operation that simply returns nil + operation := func(ctx context.Context) error { + return nil // Success on first attempt + } + + ctx := context.Background() + err := azappcfg.startupWithRetry(ctx, 10*time.Second, operation) + + assert.NoError(t, err) +} + +// Test startupWithRetry with retry after retriable error +func TestStartupWithRetry_Success_AfterRetriableError(t *testing.T) { + mockClientManager := new(mockClientManager) + + azappcfg := &AzureAppConfiguration{ + clientManager: mockClientManager, + keyValues: make(map[string]any), + featureFlags: make(map[string]any), + kvSelectors: []Selector{{KeyFilter: "*", LabelFilter: "\x00"}}, + } + + callCount := 0 + retriableError := &azcore.ResponseError{StatusCode: http.StatusServiceUnavailable} + + // Create an operation that fails first, then succeeds + operation := func(ctx context.Context) error { + callCount++ + if callCount == 1 { + return retriableError // Fail on first attempt + } + return nil // Success on second attempt + } + + ctx := context.Background() + err := azappcfg.startupWithRetry(ctx, 10*time.Second, operation) + + assert.NoError(t, err) + assert.Equal(t, 2, callCount, "Operation should be called twice") +} + +// Test startupWithRetry with non-retriable error +func TestStartupWithRetry_NonRetriableError(t *testing.T) { + mockClientManager := new(mockClientManager) + + azappcfg := &AzureAppConfiguration{ + clientManager: mockClientManager, + keyValues: make(map[string]any), + featureFlags: make(map[string]any), + kvSelectors: []Selector{{KeyFilter: "*", LabelFilter: "\x00"}}, + } + + callCount := 0 + nonRetriableError := &azcore.ResponseError{StatusCode: http.StatusBadRequest} + + // Create an operation that fails with non-retriable error + operation := func(ctx context.Context) error { + callCount++ + return nonRetriableError // Non-retriable error + } + + ctx := context.Background() + err := azappcfg.startupWithRetry(ctx, 10*time.Second, operation) + + assert.Error(t, err) + assert.Contains(t, err.Error(), "load from Azure App Configuration failed with non-retriable error") + assert.Equal(t, 1, callCount, "Operation should be called only once for non-retriable error") +} + +// Test startupWithRetry with timeout +func TestStartupWithRetry_Timeout(t *testing.T) { + mockClientManager := new(mockClientManager) + + azappcfg := &AzureAppConfiguration{ + clientManager: mockClientManager, + keyValues: make(map[string]any), + featureFlags: make(map[string]any), + kvSelectors: []Selector{{KeyFilter: "*", LabelFilter: "\x00"}}, + } + + callCount := 0 + retriableError := &azcore.ResponseError{StatusCode: http.StatusServiceUnavailable} + + // Create an operation that always fails with retriable error + operation := func(ctx context.Context) error { + callCount++ + return retriableError // Always fail + } + + ctx := context.Background() + // Use a very short timeout to trigger timeout quickly + err := azappcfg.startupWithRetry(ctx, 100*time.Millisecond, operation) + + assert.Error(t, err) + assert.True(t, + err.Error() == "startup timeout reached after 100ms" || + err.Error() == fmt.Sprintf("load from Azure App Configuration failed after %d attempts within timeout 100ms: %v", callCount, retriableError), + "Error should indicate timeout or max attempts reached: %v", err) + assert.True(t, callCount >= 1, "Operation should be called at least once") +} + +// Test startupWithRetry with context cancellation during backoff +func TestStartupWithRetry_ContextCancelledDuringBackoff(t *testing.T) { + mockClientManager := new(mockClientManager) + + azappcfg := &AzureAppConfiguration{ + clientManager: mockClientManager, + keyValues: make(map[string]any), + featureFlags: make(map[string]any), + kvSelectors: []Selector{{KeyFilter: "*", LabelFilter: "\x00"}}, + } + + callCount := 0 + retriableError := &azcore.ResponseError{StatusCode: http.StatusServiceUnavailable} + + // Create an operation that fails with retriable error + operation := func(ctx context.Context) error { + callCount++ + return retriableError + } + + ctx, cancel := context.WithCancel(context.Background()) + + // Cancel the context after a short delay to simulate cancellation during backoff + go func() { + time.Sleep(50 * time.Millisecond) + cancel() + }() + + err := azappcfg.startupWithRetry(ctx, 10*time.Second, operation) + + assert.Error(t, err) + assert.Contains(t, err.Error(), "load from Azure App Configuration timed out: context canceled") +} + +// Test startupWithRetry with default timeout when zero timeout provided +func TestStartupWithRetry_DefaultTimeout(t *testing.T) { + mockClientManager := new(mockClientManager) + + azappcfg := &AzureAppConfiguration{ + clientManager: mockClientManager, + keyValues: make(map[string]any), + featureFlags: make(map[string]any), + kvSelectors: []Selector{{KeyFilter: "*", LabelFilter: "\x00"}}, + } + + callCount := 0 + // Create an operation that succeeds + operation := func(ctx context.Context) error { + callCount++ + return nil + } + + ctx := context.Background() + // Pass zero timeout to test default timeout usage + err := azappcfg.startupWithRetry(ctx, 0, operation) + + assert.NoError(t, err) + assert.Equal(t, 1, callCount, "Operation should be called once") +} + +// Test startupWithRetry with insufficient time remaining for retry +func TestStartupWithRetry_InsufficientTimeForRetry(t *testing.T) { + mockClientManager := new(mockClientManager) + + azappcfg := &AzureAppConfiguration{ + clientManager: mockClientManager, + keyValues: make(map[string]any), + featureFlags: make(map[string]any), + kvSelectors: []Selector{{KeyFilter: "*", LabelFilter: "\x00"}}, + } + + callCount := 0 + retriableError := &azcore.ResponseError{StatusCode: http.StatusServiceUnavailable} + + // Create an operation that always fails + operation := func(ctx context.Context) error { + callCount++ + // Add some delay to consume time + time.Sleep(50 * time.Millisecond) + return retriableError + } + + ctx := context.Background() + // Use a short timeout that will be consumed by the first failure and not allow retry + err := azappcfg.startupWithRetry(ctx, 80*time.Millisecond, operation) + + assert.Error(t, err) + assert.True(t, + err.Error() == "startup timeout reached after 80ms" || + strings.Contains(err.Error(), "load from Azure App Configuration failed after") && strings.Contains(err.Error(), "attempts within timeout"), + "Error should indicate timeout or insufficient time: %v", err) + assert.True(t, callCount >= 1, "Operation should be called at least once") +} diff --git a/azureappconfiguration/options.go b/azureappconfiguration/options.go index f96ec9e..d1f0533 100644 --- a/azureappconfiguration/options.go +++ b/azureappconfiguration/options.go @@ -43,6 +43,9 @@ type Options struct { // LoadBalancingEnabled specifies whether to enable load balancing across multiple replicas of the Azure App Configuration service. // It defaults to false. LoadBalancingEnabled bool + + // StartupOptions is used when initially loading data into the configuration provider. + StartupOptions StartupOptions } // AuthenticationOptions contains parameters for authenticating with the Azure App Configuration service. @@ -159,3 +162,9 @@ type ConstructionOptions struct { // If not provided, the default separator "." will be used. Separator string } + +// StartupOptions is used when initially loading data into the configuration provider. +type StartupOptions struct { + // Timeout specifies the amount of time allowed to load data from Azure App Configuration on startup. + Timeout time.Duration +}