Skip to content

Commit 465dd6f

Browse files
committed
Add caching for failed deployments due to configuration errors
- Introduced a new cache to track deployments that fail due to configuration issues, preventing repeated retry attempts. - Implemented logic to skip deployments that have previously failed due to configuration errors. - Added a method to clear the failed deployments cache, allowing for retries of specific or all failed deployments.
1 parent fd7808e commit 465dd6f

File tree

1 file changed

+53
-2
lines changed

1 file changed

+53
-2
lines changed

litellm/router.py

Lines changed: 53 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -418,6 +418,9 @@ def __init__( # noqa: PLR0915
418418
self.failed_calls = (
419419
InMemoryCache()
420420
) # cache to track failed call per deployment, if num failed calls within 1 minute > allowed fails, then add it to cooldown
421+
self.failed_deployments = (
422+
InMemoryCache()
423+
) # cache to track deployments that failed due to configuration errors (missing credentials, etc.) to prevent repeated retry attempts
421424

422425
if num_retries is not None:
423426
self.num_retries = num_retries
@@ -4835,6 +4838,14 @@ def upsert_deployment(self, deployment: Deployment) -> Optional[Deployment]:
48354838
# check if deployment already exists
48364839
_deployment_model_id = deployment.model_info.id or ""
48374840

4841+
# Check if this deployment has previously failed due to configuration errors
4842+
_deployment_cache_key = f"failed_deployment_{_deployment_model_id}"
4843+
if self.failed_deployments.get_cache(_deployment_cache_key) is not None:
4844+
verbose_router_logger.debug(
4845+
f"Skipping deployment {_deployment_model_id} - previously failed due to configuration errors"
4846+
)
4847+
return None
4848+
48384849
_deployment_on_router: Optional[Deployment] = self.get_deployment(
48394850
model_id=_deployment_model_id
48404851
)
@@ -4859,9 +4870,29 @@ def upsert_deployment(self, deployment: Deployment) -> Optional[Deployment]:
48594870
return deployment
48604871
except Exception as e:
48614872
if self.ignore_invalid_deployments:
4862-
verbose_router_logger.warning(
4863-
f"Error upserting deployment: {e}, ignoring and continuing with other deployments."
4873+
# Check if this is a configuration error (missing credentials, etc.)
4874+
_is_config_error = (
4875+
"api_key is required" in str(e) or
4876+
"vertex_project, and vertex_location must be set" in str(e) or
4877+
"credentials" in str(e).lower() or
4878+
"authentication" in str(e).lower()
48644879
)
4880+
4881+
if _is_config_error:
4882+
# Cache this deployment as failed to prevent repeated retries
4883+
_deployment_cache_key = f"failed_deployment_{deployment.model_info.id or ''}"
4884+
self.failed_deployments.set_cache(
4885+
key=_deployment_cache_key,
4886+
value=str(e),
4887+
ttl=3600 # Cache for 1 hour
4888+
)
4889+
verbose_router_logger.warning(
4890+
f"Error upserting deployment: {e}, caching as failed deployment to prevent retries."
4891+
)
4892+
else:
4893+
verbose_router_logger.warning(
4894+
f"Error upserting deployment: {e}, ignoring and continuing with other deployments."
4895+
)
48654896
return None
48664897
else:
48674898
raise e
@@ -4917,6 +4948,26 @@ def get_deployment_credentials(self, model_id: str) -> Optional[dict]:
49174948
**deployment.litellm_params.model_dump(exclude_none=True)
49184949
).model_dump(exclude_none=True)
49194950

4951+
def clear_failed_deployments_cache(self, model_id: Optional[str] = None) -> None:
4952+
"""
4953+
Clear the cache of failed deployments to allow retrying them.
4954+
4955+
Parameters:
4956+
- model_id (Optional[str]): If provided, only clear the cache for this specific model ID.
4957+
If None, clear all failed deployment entries.
4958+
"""
4959+
if model_id is not None:
4960+
# Clear cache for specific deployment
4961+
_deployment_cache_key = f"failed_deployment_{model_id}"
4962+
self.failed_deployments.delete_cache(_deployment_cache_key)
4963+
verbose_router_logger.info(
4964+
f"Cleared failed deployment cache for model_id: {model_id}"
4965+
)
4966+
else:
4967+
# Clear all failed deployment entries
4968+
self.failed_deployments.flush_cache()
4969+
verbose_router_logger.info("Cleared all failed deployment cache entries")
4970+
49204971
def get_deployment_by_model_group_name(
49214972
self, model_group_name: str
49224973
) -> Optional[Deployment]:

0 commit comments

Comments
 (0)