@@ -418,6 +418,9 @@ def __init__( # noqa: PLR0915
418
418
self .failed_calls = (
419
419
InMemoryCache ()
420
420
) # cache to track failed call per deployment, if num failed calls within 1 minute > allowed fails, then add it to cooldown
421
+ self .failed_deployments = (
422
+ InMemoryCache ()
423
+ ) # cache to track deployments that failed due to configuration errors (missing credentials, etc.) to prevent repeated retry attempts
421
424
422
425
if num_retries is not None :
423
426
self .num_retries = num_retries
@@ -4835,6 +4838,14 @@ def upsert_deployment(self, deployment: Deployment) -> Optional[Deployment]:
4835
4838
# check if deployment already exists
4836
4839
_deployment_model_id = deployment .model_info .id or ""
4837
4840
4841
+ # Check if this deployment has previously failed due to configuration errors
4842
+ _deployment_cache_key = f"failed_deployment_{ _deployment_model_id } "
4843
+ if self .failed_deployments .get_cache (_deployment_cache_key ) is not None :
4844
+ verbose_router_logger .debug (
4845
+ f"Skipping deployment { _deployment_model_id } - previously failed due to configuration errors"
4846
+ )
4847
+ return None
4848
+
4838
4849
_deployment_on_router : Optional [Deployment ] = self .get_deployment (
4839
4850
model_id = _deployment_model_id
4840
4851
)
@@ -4859,9 +4870,29 @@ def upsert_deployment(self, deployment: Deployment) -> Optional[Deployment]:
4859
4870
return deployment
4860
4871
except Exception as e :
4861
4872
if self .ignore_invalid_deployments :
4862
- verbose_router_logger .warning (
4863
- f"Error upserting deployment: { e } , ignoring and continuing with other deployments."
4873
+ # Check if this is a configuration error (missing credentials, etc.)
4874
+ _is_config_error = (
4875
+ "api_key is required" in str (e ) or
4876
+ "vertex_project, and vertex_location must be set" in str (e ) or
4877
+ "credentials" in str (e ).lower () or
4878
+ "authentication" in str (e ).lower ()
4864
4879
)
4880
+
4881
+ if _is_config_error :
4882
+ # Cache this deployment as failed to prevent repeated retries
4883
+ _deployment_cache_key = f"failed_deployment_{ deployment .model_info .id or '' } "
4884
+ self .failed_deployments .set_cache (
4885
+ key = _deployment_cache_key ,
4886
+ value = str (e ),
4887
+ ttl = 3600 # Cache for 1 hour
4888
+ )
4889
+ verbose_router_logger .warning (
4890
+ f"Error upserting deployment: { e } , caching as failed deployment to prevent retries."
4891
+ )
4892
+ else :
4893
+ verbose_router_logger .warning (
4894
+ f"Error upserting deployment: { e } , ignoring and continuing with other deployments."
4895
+ )
4865
4896
return None
4866
4897
else :
4867
4898
raise e
@@ -4917,6 +4948,26 @@ def get_deployment_credentials(self, model_id: str) -> Optional[dict]:
4917
4948
** deployment .litellm_params .model_dump (exclude_none = True )
4918
4949
).model_dump (exclude_none = True )
4919
4950
4951
+ def clear_failed_deployments_cache (self , model_id : Optional [str ] = None ) -> None :
4952
+ """
4953
+ Clear the cache of failed deployments to allow retrying them.
4954
+
4955
+ Parameters:
4956
+ - model_id (Optional[str]): If provided, only clear the cache for this specific model ID.
4957
+ If None, clear all failed deployment entries.
4958
+ """
4959
+ if model_id is not None :
4960
+ # Clear cache for specific deployment
4961
+ _deployment_cache_key = f"failed_deployment_{ model_id } "
4962
+ self .failed_deployments .delete_cache (_deployment_cache_key )
4963
+ verbose_router_logger .info (
4964
+ f"Cleared failed deployment cache for model_id: { model_id } "
4965
+ )
4966
+ else :
4967
+ # Clear all failed deployment entries
4968
+ self .failed_deployments .flush_cache ()
4969
+ verbose_router_logger .info ("Cleared all failed deployment cache entries" )
4970
+
4920
4971
def get_deployment_by_model_group_name (
4921
4972
self , model_group_name : str
4922
4973
) -> Optional [Deployment ]:
0 commit comments