Skip to content
This repository was archived by the owner on Jun 5, 2025. It is now read-only.

Removed litellm from dependencies. #1300

Merged
merged 1 commit into from
Mar 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,089 changes: 13 additions & 1,076 deletions poetry.lock

Large diffs are not rendered by default.

2 changes: 0 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ PyYAML = "==6.0.2"
fastapi = "==0.115.11"
uvicorn = "==0.34.0"
structlog = "==25.2.0"
litellm = "==1.63.0"
llama_cpp_python = "==0.3.5"
cryptography = "==44.0.2"
sqlalchemy = "==2.0.39"
Expand Down Expand Up @@ -50,7 +49,6 @@ ruff = "==0.11.0"
bandit = "==1.8.3"
build = "==1.2.2.post1"
wheel = "==0.45.1"
litellm = "==1.63.0"
pytest-asyncio = "==0.25.3"
llama_cpp_python = "==0.3.5"
scikit-learn = "==1.6.1"
Expand Down
283 changes: 15 additions & 268 deletions src/codegate/muxing/adapter.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,10 @@
import json
import uuid
from abc import ABC, abstractmethod
from typing import Callable, Dict, Union
from urllib.parse import urljoin

import structlog
from fastapi.responses import JSONResponse, StreamingResponse
from litellm import ModelResponse
from litellm.types.utils import Delta, StreamingChoices

from codegate.config import Config
from codegate.db import models as db_models
from codegate.muxing import rulematcher
from codegate.muxing.ollama_mappers import (
openai_chunk_from_ollama_chat,
openai_chunk_from_ollama_generate,
)
from codegate.types.ollama import StreamingChatCompletion as OllamaStreamingChatCompletion
from codegate.types.ollama import StreamingGenerateCompletion as OllamaStreamingGenerateCompletion

logger = structlog.get_logger("codegate")

Expand All @@ -35,260 +22,20 @@ def get_llamacpp_models_folder():
return override if override else "./codegate_volume/models"


class BodyAdapter:
"""
Format the body to the destination provider format.
def get_provider_formatted_url(model_route: rulematcher.ModelRoute) -> str:
"""Get the provider formatted URL to use in base_url. Note this value comes from DB"""
if model_route.endpoint.provider_type in [
db_models.ProviderType.openai,
db_models.ProviderType.vllm,
]:
return urljoin(model_route.endpoint.endpoint, "/v1")
if model_route.endpoint.provider_type == db_models.ProviderType.openrouter:
return urljoin(model_route.endpoint.endpoint, "/api/v1")
if model_route.endpoint.provider_type == db_models.ProviderType.llamacpp:
return get_llamacpp_models_folder()
return model_route.endpoint.endpoint

We expect the body to always be in OpenAI format. We need to configure the client
to send and expect OpenAI format. Here we just need to set the destination provider info.
"""

def _get_provider_formatted_url(self, model_route: rulematcher.ModelRoute) -> str:
"""Get the provider formatted URL to use in base_url. Note this value comes from DB"""
if model_route.endpoint.provider_type in [
db_models.ProviderType.openai,
db_models.ProviderType.vllm,
]:
return urljoin(model_route.endpoint.endpoint, "/v1")
if model_route.endpoint.provider_type == db_models.ProviderType.openrouter:
return urljoin(model_route.endpoint.endpoint, "/api/v1")
if model_route.endpoint.provider_type == db_models.ProviderType.llamacpp:
return get_llamacpp_models_folder()
return model_route.endpoint.endpoint

def get_destination_info(self, model_route: rulematcher.ModelRoute) -> dict:
"""Set the destination provider info."""
return model_route.model.name, self._get_provider_formatted_url(model_route)


class OutputFormatter(ABC):

@property
@abstractmethod
def provider_format_funcs(self) -> Dict[str, Callable]:
"""
Return the provider specific format functions. All providers format functions should
return the chunk in OpenAI format.
"""
pass

@abstractmethod
def format(
self, response: Union[StreamingResponse, JSONResponse], dest_prov: db_models.ProviderType
) -> Union[StreamingResponse, JSONResponse]:
"""Format the response to the client."""
pass


class StreamChunkFormatter(OutputFormatter):
"""
Format a single chunk from a stream to OpenAI format.
We need to configure the client to expect the OpenAI format.
In Continue this means setting "provider": "openai" in the config json file.
"""

@property
@abstractmethod
def provider_format_funcs(self) -> Dict[str, Callable]:
"""
Return the provider specific format functions. All providers format functions should
return the chunk in OpenAI format.
"""
pass

def _clean_chunk(self, chunk: str) -> str:
"""Clean the chunk from the "data:" and any extra characters."""
# Find the first position of 'data:' and add 5 characters to skip 'data:'
start_pos = chunk.find("data:") + 5
cleaned_chunk = chunk[start_pos:].strip()
return cleaned_chunk

def _format_openai(self, chunk: str) -> str:
"""
The chunk is already in OpenAI format. To standarize remove the "data:" prefix.

This function is used by both chat and FIM formatters
"""
return self._clean_chunk(chunk)

def _format_antropic(self, chunk: str) -> str:
"""
Format the Anthropic chunk to OpenAI format.

This function is used by both chat and FIM formatters
"""
cleaned_chunk = self._clean_chunk(chunk)
try:
# Use `strict=False` to allow the JSON payload to contain
# newlines, tabs and other valid characters that might
# come from Anthropic returning code.
chunk_dict = json.loads(cleaned_chunk, strict=False)
except Exception as e:
logger.warning(f"Error parsing Anthropic chunk: {chunk}. Error: {e}")
return cleaned_chunk.strip()

msg_type = chunk_dict.get("type", "")

finish_reason = None
if msg_type == "message_stop":
finish_reason = "stop"

# In type == "content_block_start" the content comes in "content_block"
# In type == "content_block_delta" the content comes in "delta"
msg_content_dict = chunk_dict.get("delta", {}) or chunk_dict.get("content_block", {})
# We couldn't obtain the content from the chunk. Skip it.
if not msg_content_dict:
return ""
msg_content = msg_content_dict.get("text", "")

open_ai_chunk = ModelResponse(
id=f"anthropic-chat-{str(uuid.uuid4())}",
model="anthropic-muxed-model",
object="chat.completion.chunk",
choices=[
StreamingChoices(
finish_reason=finish_reason,
index=0,
delta=Delta(content=msg_content, role="assistant"),
logprobs=None,
)
],
)

try:
return open_ai_chunk.model_dump_json(exclude_none=True, exclude_unset=True)
except Exception as e:
logger.warning(f"Error serializing Anthropic chunk: {chunk}. Error: {e}")
return cleaned_chunk.strip()

def _format_as_openai_chunk(self, formatted_chunk: str) -> str:
"""Format the chunk as OpenAI chunk. This is the format how the clients expect the data."""
chunk_to_send = f"data: {formatted_chunk}\n\n"
return chunk_to_send

async def _format_streaming_response(
self, response: StreamingResponse, dest_prov: db_models.ProviderType
):
"""Format the streaming response to OpenAI format."""
format_func = self.provider_format_funcs.get(dest_prov)
openai_chunk = None
try:
async for chunk in response.body_iterator:
openai_chunk = format_func(chunk)
# Sometimes for Anthropic we couldn't get content from the chunk. Skip it.
if not openai_chunk:
continue
yield self._format_as_openai_chunk(openai_chunk)
except Exception as e:
logger.error(f"Error sending chunk in muxing: {e}")
yield self._format_as_openai_chunk(str(e))
finally:
# Make sure the last chunk is always [DONE]
if openai_chunk and "[DONE]" not in openai_chunk:
yield self._format_as_openai_chunk("[DONE]")

def format(
self, response: StreamingResponse, dest_prov: db_models.ProviderType
) -> StreamingResponse:
"""Format the response to the client."""
return StreamingResponse(
self._format_streaming_response(response, dest_prov),
status_code=response.status_code,
headers=response.headers,
background=response.background,
media_type=response.media_type,
)


class ChatStreamChunkFormatter(StreamChunkFormatter):
"""
Format a single chunk from a stream to OpenAI format given that the request was a chat.
"""

@property
def provider_format_funcs(self) -> Dict[str, Callable]:
"""
Return the provider specific format functions. All providers format functions should
return the chunk in OpenAI format.
"""
return {
db_models.ProviderType.ollama: self._format_ollama,
db_models.ProviderType.openai: self._format_openai,
db_models.ProviderType.anthropic: self._format_antropic,
# Our Lllamacpp provider emits OpenAI chunks
db_models.ProviderType.llamacpp: self._format_openai,
# OpenRouter is a dialect of OpenAI
db_models.ProviderType.openrouter: self._format_openai,
# VLLM is a dialect of OpenAI
db_models.ProviderType.vllm: self._format_openai,
}

def _format_ollama(self, chunk: str) -> str:
"""Format the Ollama chunk to OpenAI format."""
try:
chunk_dict = json.loads(chunk)
ollama_chunk = OllamaStreamingChatCompletion.model_validate(chunk_dict)
open_ai_chunk = openai_chunk_from_ollama_chat(ollama_chunk)
return open_ai_chunk.model_dump_json(exclude_none=True, exclude_unset=True)
except Exception as e:
# Sometimes we receive an OpenAI formatted chunk from ollama. Specifically when
# talking to Cline or Kodu. If that's the case we use the format_openai function.
if "data:" in chunk:
return self._format_openai(chunk)
logger.warning(f"Error formatting Ollama chunk: {chunk}. Error: {e}")
return chunk


class FimStreamChunkFormatter(StreamChunkFormatter):

@property
def provider_format_funcs(self) -> Dict[str, Callable]:
"""
Return the provider specific format functions. All providers format functions should
return the chunk in OpenAI format.
"""
return {
db_models.ProviderType.ollama: self._format_ollama,
db_models.ProviderType.openai: self._format_openai,
# Our Lllamacpp provider emits OpenAI chunks
db_models.ProviderType.llamacpp: self._format_openai,
# OpenRouter is a dialect of OpenAI
db_models.ProviderType.openrouter: self._format_openai,
# VLLM is a dialect of OpenAI
db_models.ProviderType.vllm: self._format_openai,
db_models.ProviderType.anthropic: self._format_antropic,
}

def _format_ollama(self, chunk: str) -> str:
"""Format the Ollama chunk to OpenAI format."""
try:
chunk_dict = json.loads(chunk)
ollama_chunk = OllamaStreamingGenerateCompletion.model_validate(chunk_dict)
open_ai_chunk = openai_chunk_from_ollama_generate(ollama_chunk)
return open_ai_chunk.model_dump_json(exclude_none=True, exclude_unset=True)
except Exception as e:
print("Error formatting Ollama chunk: ", chunk, e)
return chunk


class ResponseAdapter:

def _get_formatter(
self, response: Union[StreamingResponse, JSONResponse], is_fim_request: bool
) -> OutputFormatter:
"""Get the formatter based on the request type."""
if isinstance(response, StreamingResponse):
if is_fim_request:
return FimStreamChunkFormatter()
return ChatStreamChunkFormatter()
raise MuxingAdapterError("Only streaming responses are supported.")

def format_response_to_client(
self,
response: Union[StreamingResponse, JSONResponse],
dest_prov: db_models.ProviderType,
is_fim_request: bool,
) -> Union[StreamingResponse, JSONResponse]:
"""Format the response to the client."""
stream_formatter = self._get_formatter(response, is_fim_request)
return stream_formatter.format(response, dest_prov)
def get_destination_info(model_route: rulematcher.ModelRoute) -> dict:
"""Set the destination provider info."""
return model_route.model.name, get_provider_formatted_url(model_route)
6 changes: 2 additions & 4 deletions src/codegate/muxing/router.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from codegate.db.models import ProviderType
from codegate.muxing import models as mux_models
from codegate.muxing import rulematcher
from codegate.muxing.adapter import BodyAdapter, ResponseAdapter
from codegate.muxing.adapter import get_destination_info
from codegate.providers.fim_analyzer import FIMAnalyzer
from codegate.providers.registry import ProviderRegistry
from codegate.types import anthropic, ollama, openai
Expand Down Expand Up @@ -39,11 +39,9 @@ class MuxRouter:

def __init__(self, provider_registry: ProviderRegistry):
self._ws_crud = WorkspaceCrud()
self._body_adapter = BodyAdapter()
self.router = APIRouter()
self._setup_routes()
self._provider_registry = provider_registry
self._response_adapter = ResponseAdapter()

@property
def route_name(self) -> str:
Expand Down Expand Up @@ -128,7 +126,7 @@ async def route_to_dest_provider(

# 2. Map the request body to the destination provider format.
rest_of_path = self._ensure_path_starts_with_slash(rest_of_path)
model, base_url = self._body_adapter.get_destination_info(model_route)
model, base_url = get_destination_info(model_route)

# 3. Run pipeline. Selecting the correct destination provider.
provider = self._provider_registry.get_provider(model_route.endpoint.provider_type)
Expand Down
Loading
Loading