Pipe the chunks through the output pipeline

jhrozek · jhrozek · commit c3afb52d0364 · 2024-12-12T17:45:35.000Z
diff --git a/src/codegate/pipeline/extract_snippets/output.py b/src/codegate/pipeline/extract_snippets/output.py
@@ -98,7 +98,7 @@ async def process_chunk(
         input_context: Optional[PipelineContext] = None,
     ) -> list[ModelResponse]:
         """Process a single chunk of the stream"""
-        if not chunk.choices[0].delta.content:
+        if len(chunk.choices) == 0 or not chunk.choices[0].delta.content:
             return [chunk]
 
         # Get current content plus this new chunk
diff --git a/src/codegate/pipeline/secrets/secrets.py b/src/codegate/pipeline/secrets/secrets.py
@@ -262,7 +262,7 @@ async def process_chunk(
         if input_context.sensitive.session_id == "":
             raise ValueError("Session ID not found in input context")
 
-        if not chunk.choices[0].delta.content:
+        if len(chunk.choices) == 0 or not chunk.choices[0].delta.content:
             return [chunk]
 
         # Check the buffered content
diff --git a/src/codegate/providers/copilot/provider.py b/src/codegate/providers/copilot/provider.py
@@ -1,12 +1,12 @@
 import asyncio
-import json
 import re
 import ssl
 from dataclasses import dataclass
 from typing import Dict, List, Optional, Tuple
 from urllib.parse import unquote, urljoin, urlparse
 
 import structlog
+from litellm.types.utils import Delta, ModelResponse, StreamingChoices
 
 from codegate.ca.codegate_ca import CertificateAuthority
 from codegate.config import Config
@@ -568,29 +568,51 @@ def connection_made(self, transport: asyncio.Transport) -> None:
 
     async def _process_stream(self):
         try:
+
             async def stream_iterator():
                 while True:
                     incoming_record = await self.stream_queue.get()
-                    yield incoming_record
-
-            async for record in stream_iterator():
-                print("received from stream")
-                print(record)
-                if record["type"] == "done":
-                    sse_data = b"data: [DONE]\n\n"
-                    # Add chunk size for DONE message too
-                    chunk_size = hex(len(sse_data))[2:] + "\r\n"
-                    self._proxy_transport_write(chunk_size.encode())
-                    self._proxy_transport_write(sse_data)
-                    self._proxy_transport_write(b"\r\n")
-                    # Now send the final zero chunk
-                    self._proxy_transport_write(b"0\r\n\r\n")
-                else:
-                    sse_data = f"data: {json.dumps(record['content'])}\n\n".encode("utf-8")
-                    chunk_size = hex(len(sse_data))[2:] + "\r\n"
-                    self._proxy_transport_write(chunk_size.encode())
-                    self._proxy_transport_write(sse_data)
-                    self._proxy_transport_write(b"\r\n")
+                    record_content = incoming_record.get("content", {})
+
+                    streaming_choices = []
+                    for choice in record_content.get("choices", []):
+                        streaming_choices.append(
+                            StreamingChoices(
+                                finish_reason=choice.get("finish_reason", None),
+                                index=0,
+                                delta=Delta(
+                                    content=choice.get("delta", {}).get("content"), role="assistant"
+                                ),
+                                logprobs=None,
+                            )
+                        )
+
+                    # Convert record to ModelResponse
+                    mr = ModelResponse(
+                        id=record_content.get("id", ""),
+                        choices=streaming_choices,
+                        created=record_content.get("created", 0),
+                        model=record_content.get("model", ""),
+                        object="chat.completion.chunk",
+                    )
+                    yield mr
+
+            async for record in self.output_pipeline_instance.process_stream(stream_iterator()):
+                chunk = record.model_dump_json(exclude_none=True, exclude_unset=True)
+                sse_data = f"data:{chunk}\n\n".encode("utf-8")
+                chunk_size = hex(len(sse_data))[2:] + "\r\n"
+                self._proxy_transport_write(chunk_size.encode())
+                self._proxy_transport_write(sse_data)
+                self._proxy_transport_write(b"\r\n")
+
+            sse_data = b"data: [DONE]\n\n"
+            # Add chunk size for DONE message too
+            chunk_size = hex(len(sse_data))[2:] + "\r\n"
+            self._proxy_transport_write(chunk_size.encode())
+            self._proxy_transport_write(sse_data)
+            self._proxy_transport_write(b"\r\n")
+            # Now send the final zero chunk
+            self._proxy_transport_write(b"0\r\n\r\n")
 
         except Exception as e:
             logger.error(f"Error processing stream: {e}")
diff --git a/src/codegate/providers/copilot/streaming.py b/src/codegate/providers/copilot/streaming.py
@@ -13,9 +13,6 @@ def __init__(self):
         self.size_written = False
 
     def process_chunk(self, chunk: bytes) -> list:
-        print("BUFFER AT START")
-        print(self.buffer)
-        print("BUFFER AT START - END")
         # Skip any chunk size lines (hex number followed by \r\n)
         try:
             chunk_str = chunk.decode("utf-8")
@@ -25,13 +22,12 @@ def process_chunk(self, chunk: bytes) -> list:
                     continue
                 self.buffer += line
         except UnicodeDecodeError:
-            print("Failed to decode chunk")
+            logger.error("Failed to decode chunk")
 
         records = []
         while True:
             record_end = self.buffer.find("\n\n")
             if record_end == -1:
-                print(f"REMAINING BUFFER {self.buffer}")
                 break
 
             record = self.buffer[:record_end]