Skip to content

Commit dbc70eb

Browse files
Mike Morganclaude
andcommitted
feat: Add smart retry logic with exponential backoff
Implements Issue #43 - Smart Retry Logic with Exponential Backoff Features: - RetryConfig dataclass with configurable parameters - RetryManager class for executing operations with retry logic - Multiple backoff strategies: exponential, linear, constant, fibonacci - Configurable jitter to prevent thundering herd problem - @Retry decorator for easy function decoration - Preset configurations for network, API, and apt operations - Comprehensive test suite (33 tests) The module provides robust retry mechanisms for: - Network operations with transient failures - LLM API calls with rate limiting - Package manager operations with lock file issues 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <[email protected]>
1 parent f18bc09 commit dbc70eb

File tree

2 files changed

+761
-0
lines changed

2 files changed

+761
-0
lines changed

cortex/retry.py

Lines changed: 327 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,327 @@
1+
"""Smart retry logic with exponential backoff for Cortex operations.
2+
3+
This module provides robust retry mechanisms for network operations,
4+
API calls, and package installations that may fail transiently.
5+
6+
Implements Issue #43: Smart Retry Logic with Exponential Backoff
7+
"""
8+
9+
import time
10+
import random
11+
import logging
12+
import functools
13+
from typing import Callable, TypeVar, Optional, Tuple, Type, Union, List
14+
from dataclasses import dataclass, field
15+
from enum import Enum
16+
17+
logger = logging.getLogger(__name__)
18+
19+
T = TypeVar('T')
20+
21+
22+
class RetryStrategy(Enum):
23+
"""Available retry strategies."""
24+
EXPONENTIAL = "exponential"
25+
LINEAR = "linear"
26+
CONSTANT = "constant"
27+
FIBONACCI = "fibonacci"
28+
29+
30+
@dataclass
31+
class RetryConfig:
32+
"""Configuration for retry behavior.
33+
34+
Attributes:
35+
max_attempts: Maximum number of retry attempts (including initial try)
36+
base_delay: Initial delay in seconds before first retry
37+
max_delay: Maximum delay cap in seconds
38+
exponential_base: Base for exponential backoff (default 2)
39+
jitter: Whether to add random jitter to prevent thundering herd
40+
jitter_range: Range for jitter as fraction of delay (0.0 to 1.0)
41+
strategy: Retry strategy to use
42+
retryable_exceptions: Tuple of exception types that trigger retry
43+
"""
44+
max_attempts: int = 3
45+
base_delay: float = 1.0
46+
max_delay: float = 60.0
47+
exponential_base: float = 2.0
48+
jitter: bool = True
49+
jitter_range: float = 0.25
50+
strategy: RetryStrategy = RetryStrategy.EXPONENTIAL
51+
retryable_exceptions: Tuple[Type[Exception], ...] = (Exception,)
52+
53+
def __post_init__(self):
54+
if self.max_attempts < 1:
55+
raise ValueError("max_attempts must be at least 1")
56+
if self.base_delay < 0:
57+
raise ValueError("base_delay must be non-negative")
58+
if self.max_delay < self.base_delay:
59+
raise ValueError("max_delay must be >= base_delay")
60+
if not 0 <= self.jitter_range <= 1:
61+
raise ValueError("jitter_range must be between 0 and 1")
62+
63+
64+
@dataclass
65+
class RetryResult:
66+
"""Result of a retry operation.
67+
68+
Attributes:
69+
success: Whether the operation ultimately succeeded
70+
result: The return value if successful, None otherwise
71+
attempts: Number of attempts made
72+
total_time: Total time spent including delays
73+
errors: List of errors encountered during retries
74+
final_error: The last error if operation failed
75+
"""
76+
success: bool
77+
result: Optional[T] = None
78+
attempts: int = 0
79+
total_time: float = 0.0
80+
errors: List[Exception] = field(default_factory=list)
81+
final_error: Optional[Exception] = None
82+
83+
84+
class RetryManager:
85+
"""Manages retry operations with configurable backoff strategies."""
86+
87+
# Precomputed Fibonacci sequence for fibonacci backoff
88+
_FIBONACCI = [1, 1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144]
89+
90+
def __init__(self, config: Optional[RetryConfig] = None):
91+
"""Initialize retry manager with configuration.
92+
93+
Args:
94+
config: RetryConfig instance, uses defaults if None
95+
"""
96+
self.config = config or RetryConfig()
97+
98+
def _calculate_delay(self, attempt: int) -> float:
99+
"""Calculate delay for a given attempt number.
100+
101+
Args:
102+
attempt: The attempt number (0-indexed)
103+
104+
Returns:
105+
Delay in seconds
106+
"""
107+
if self.config.strategy == RetryStrategy.CONSTANT:
108+
delay = self.config.base_delay
109+
110+
elif self.config.strategy == RetryStrategy.LINEAR:
111+
delay = self.config.base_delay * (attempt + 1)
112+
113+
elif self.config.strategy == RetryStrategy.FIBONACCI:
114+
fib_index = min(attempt, len(self._FIBONACCI) - 1)
115+
delay = self.config.base_delay * self._FIBONACCI[fib_index]
116+
117+
else: # EXPONENTIAL (default)
118+
delay = self.config.base_delay * (self.config.exponential_base ** attempt)
119+
120+
# Apply max delay cap
121+
delay = min(delay, self.config.max_delay)
122+
123+
# Apply jitter if enabled
124+
if self.config.jitter:
125+
jitter_amount = delay * self.config.jitter_range
126+
delay += random.uniform(-jitter_amount, jitter_amount)
127+
delay = max(0, delay) # Ensure non-negative
128+
129+
return delay
130+
131+
def execute(
132+
self,
133+
func: Callable[..., T],
134+
*args,
135+
on_retry: Optional[Callable[[int, Exception, float], None]] = None,
136+
**kwargs
137+
) -> RetryResult:
138+
"""Execute a function with retry logic.
139+
140+
Args:
141+
func: The function to execute
142+
*args: Positional arguments for the function
143+
on_retry: Optional callback called before each retry with
144+
(attempt_number, exception, delay)
145+
**kwargs: Keyword arguments for the function
146+
147+
Returns:
148+
RetryResult containing success status and result or errors
149+
"""
150+
start_time = time.time()
151+
errors: List[Exception] = []
152+
153+
for attempt in range(self.config.max_attempts):
154+
try:
155+
result = func(*args, **kwargs)
156+
return RetryResult(
157+
success=True,
158+
result=result,
159+
attempts=attempt + 1,
160+
total_time=time.time() - start_time,
161+
errors=errors
162+
)
163+
164+
except self.config.retryable_exceptions as e:
165+
errors.append(e)
166+
167+
if attempt < self.config.max_attempts - 1:
168+
delay = self._calculate_delay(attempt)
169+
170+
logger.warning(
171+
f"Attempt {attempt + 1}/{self.config.max_attempts} failed: {e}. "
172+
f"Retrying in {delay:.2f}s..."
173+
)
174+
175+
if on_retry:
176+
on_retry(attempt + 1, e, delay)
177+
178+
time.sleep(delay)
179+
else:
180+
logger.error(
181+
f"All {self.config.max_attempts} attempts failed. "
182+
f"Final error: {e}"
183+
)
184+
185+
return RetryResult(
186+
success=False,
187+
attempts=self.config.max_attempts,
188+
total_time=time.time() - start_time,
189+
errors=errors,
190+
final_error=errors[-1] if errors else None
191+
)
192+
193+
194+
def retry(
195+
max_attempts: int = 3,
196+
base_delay: float = 1.0,
197+
max_delay: float = 60.0,
198+
exponential_base: float = 2.0,
199+
jitter: bool = True,
200+
strategy: RetryStrategy = RetryStrategy.EXPONENTIAL,
201+
retryable_exceptions: Tuple[Type[Exception], ...] = (Exception,),
202+
on_retry: Optional[Callable[[int, Exception, float], None]] = None
203+
):
204+
"""Decorator for adding retry logic to functions.
205+
206+
Args:
207+
max_attempts: Maximum number of attempts
208+
base_delay: Initial delay in seconds
209+
max_delay: Maximum delay cap
210+
exponential_base: Base for exponential backoff
211+
jitter: Whether to add random jitter
212+
strategy: Retry strategy to use
213+
retryable_exceptions: Exception types that trigger retry
214+
on_retry: Callback for retry events
215+
216+
Returns:
217+
Decorated function with retry logic
218+
219+
Example:
220+
@retry(max_attempts=3, base_delay=1.0)
221+
def fetch_packages():
222+
return requests.get("https://api.example.com/packages")
223+
"""
224+
config = RetryConfig(
225+
max_attempts=max_attempts,
226+
base_delay=base_delay,
227+
max_delay=max_delay,
228+
exponential_base=exponential_base,
229+
jitter=jitter,
230+
strategy=strategy,
231+
retryable_exceptions=retryable_exceptions
232+
)
233+
manager = RetryManager(config)
234+
235+
def decorator(func: Callable[..., T]) -> Callable[..., T]:
236+
@functools.wraps(func)
237+
def wrapper(*args, **kwargs) -> T:
238+
result = manager.execute(func, *args, on_retry=on_retry, **kwargs)
239+
240+
if result.success:
241+
return result.result
242+
else:
243+
raise result.final_error
244+
245+
return wrapper
246+
247+
return decorator
248+
249+
250+
# Preset configurations for common use cases
251+
NETWORK_RETRY_CONFIG = RetryConfig(
252+
max_attempts=5,
253+
base_delay=1.0,
254+
max_delay=30.0,
255+
strategy=RetryStrategy.EXPONENTIAL,
256+
jitter=True
257+
)
258+
259+
API_RETRY_CONFIG = RetryConfig(
260+
max_attempts=3,
261+
base_delay=0.5,
262+
max_delay=10.0,
263+
strategy=RetryStrategy.EXPONENTIAL,
264+
jitter=True
265+
)
266+
267+
APT_RETRY_CONFIG = RetryConfig(
268+
max_attempts=3,
269+
base_delay=2.0,
270+
max_delay=60.0,
271+
strategy=RetryStrategy.EXPONENTIAL,
272+
jitter=False # No jitter for apt operations
273+
)
274+
275+
276+
def retry_apt_operation(func: Callable[..., T], *args, **kwargs) -> RetryResult:
277+
"""Convenience function for retrying apt operations.
278+
279+
Uses preset configuration optimized for package manager operations
280+
which may fail due to lock files or network issues.
281+
282+
Args:
283+
func: The apt operation function
284+
*args: Positional arguments
285+
**kwargs: Keyword arguments
286+
287+
Returns:
288+
RetryResult with operation outcome
289+
"""
290+
manager = RetryManager(APT_RETRY_CONFIG)
291+
return manager.execute(func, *args, **kwargs)
292+
293+
294+
def retry_api_call(func: Callable[..., T], *args, **kwargs) -> RetryResult:
295+
"""Convenience function for retrying API calls.
296+
297+
Uses preset configuration optimized for LLM API calls
298+
with rate limiting considerations.
299+
300+
Args:
301+
func: The API call function
302+
*args: Positional arguments
303+
**kwargs: Keyword arguments
304+
305+
Returns:
306+
RetryResult with operation outcome
307+
"""
308+
manager = RetryManager(API_RETRY_CONFIG)
309+
return manager.execute(func, *args, **kwargs)
310+
311+
312+
def retry_network_operation(func: Callable[..., T], *args, **kwargs) -> RetryResult:
313+
"""Convenience function for retrying network operations.
314+
315+
Uses preset configuration optimized for network requests
316+
that may fail due to connectivity issues.
317+
318+
Args:
319+
func: The network operation function
320+
*args: Positional arguments
321+
**kwargs: Keyword arguments
322+
323+
Returns:
324+
RetryResult with operation outcome
325+
"""
326+
manager = RetryManager(NETWORK_RETRY_CONFIG)
327+
return manager.execute(func, *args, **kwargs)

0 commit comments

Comments
 (0)