From 0d24c7fd9c793db5317c50ef1639b9e765e44bf9 Mon Sep 17 00:00:00 2001
From: Pankaj Telang <pankaj@stacklok.com>
Date: Thu, 12 Dec 2024 13:11:58 +0000
Subject: [PATCH] Add copilot headers/auth for extracting package/ecosystem

---
 src/codegate/llm_utils/extractor.py           |  6 +++++-
 src/codegate/llm_utils/llmclient.py           |  4 ++++
 src/codegate/pipeline/base.py                 |  5 ++++-
 .../codegate_context_retriever/codegate.py    |  2 ++
 src/codegate/pipeline/secrets/secrets.py      |  4 ++++
 src/codegate/providers/copilot/pipeline.py    | 19 +++++++++++++++++--
 6 files changed, 36 insertions(+), 4 deletions(-)

diff --git a/src/codegate/llm_utils/extractor.py b/src/codegate/llm_utils/extractor.py
index 272aa342..b4b2514e 100644
--- a/src/codegate/llm_utils/extractor.py
+++ b/src/codegate/llm_utils/extractor.py
@@ -1,4 +1,4 @@
-from typing import List, Optional
+from typing import Dict, List, Optional
 
 import structlog
 
@@ -24,6 +24,7 @@ async def extract_packages(
         model: str = None,
         base_url: Optional[str] = None,
         api_key: Optional[str] = None,
+        extra_headers: Optional[Dict[str, str]] = None
     ) -> List[str]:
         """Extract package names from the given content."""
         system_prompt = Config.get_config().prompts.lookup_packages
@@ -35,6 +36,7 @@ async def extract_packages(
             model=model,
             api_key=api_key,
             base_url=base_url,
+            extra_headers=extra_headers,
         )
 
         # Handle both formats: {"packages": [...]} and direct list [...]
@@ -49,6 +51,7 @@ async def extract_ecosystem(
         model: str = None,
         base_url: Optional[str] = None,
         api_key: Optional[str] = None,
+        extra_headers: Optional[Dict[str, str]] = None
     ) -> List[str]:
         """Extract ecosystem from the given content."""
         system_prompt = Config.get_config().prompts.lookup_ecosystem
@@ -60,6 +63,7 @@ async def extract_ecosystem(
             model=model,
             api_key=api_key,
             base_url=base_url,
+            extra_headers=extra_headers,
         )
 
         ecosystem = result if isinstance(result, str) else result.get("ecosystem")
diff --git a/src/codegate/llm_utils/llmclient.py b/src/codegate/llm_utils/llmclient.py
index f2b301c4..64c04f1a 100644
--- a/src/codegate/llm_utils/llmclient.py
+++ b/src/codegate/llm_utils/llmclient.py
@@ -27,6 +27,7 @@ async def complete(
         model: str = None,
         api_key: Optional[str] = None,
         base_url: Optional[str] = None,
+        extra_headers: Optional[Dict[str, str]] = None,
         **kwargs,
     ) -> Dict[str, Any]:
         """
@@ -53,6 +54,7 @@ async def complete(
             model,
             api_key,
             base_url,
+            extra_headers,
             **kwargs,
         )
 
@@ -102,6 +104,7 @@ async def _complete_litellm(
         model: str,
         api_key: str,
         base_url: Optional[str] = None,
+        extra_headers: Optional[Dict[str, str]] = None,
         **kwargs,
     ) -> Dict[str, Any]:
         # Use the private method to create the request
@@ -134,6 +137,7 @@ async def _complete_litellm(
                     temperature=request["temperature"],
                     base_url=base_url,
                     response_format=request["response_format"],
+                    extra_headers=extra_headers
                 )
                 content = response["choices"][0]["message"]["content"]
 
diff --git a/src/codegate/pipeline/base.py b/src/codegate/pipeline/base.py
index e6bbd5cd..f2f44e65 100644
--- a/src/codegate/pipeline/base.py
+++ b/src/codegate/pipeline/base.py
@@ -224,6 +224,7 @@ async def process_request(
         model: str,
         api_key: Optional[str] = None,
         api_base: Optional[str] = None,
+        extra_headers: Optional[Dict[str, str]] = None
     ) -> PipelineResult:
         """Process a request through all pipeline steps"""
         self.context.sensitive = PipelineSensitiveData(
@@ -235,6 +236,7 @@ async def process_request(
             api_base=api_base,
         )
         self.context.metadata["prompt_id"] = prompt_id
+        self.context.metadata["extra_headers"] = extra_headers
         current_request = request
 
         for step in self.pipeline_steps:
@@ -271,9 +273,10 @@ async def process_request(
         model: str,
         api_key: Optional[str] = None,
         api_base: Optional[str] = None,
+        extra_headers: Optional[Dict[str, str]] = None
     ) -> PipelineResult:
         """Create a new pipeline instance and process the request"""
         instance = self.create_instance()
         return await instance.process_request(
-            request, provider, prompt_id, model, api_key, api_base
+            request, provider, prompt_id, model, api_key, api_base, extra_headers
         )
diff --git a/src/codegate/pipeline/codegate_context_retriever/codegate.py b/src/codegate/pipeline/codegate_context_retriever/codegate.py
index ffb9bbfb..62780235 100644
--- a/src/codegate/pipeline/codegate_context_retriever/codegate.py
+++ b/src/codegate/pipeline/codegate_context_retriever/codegate.py
@@ -66,6 +66,7 @@ async def __lookup_packages(self, user_query: str, context: PipelineContext):
             model=context.sensitive.model,
             api_key=context.sensitive.api_key,
             base_url=context.sensitive.api_base,
+            extra_headers=context.metadata.get('extra_headers', None),
         )
 
         logger.info(f"Packages in user query: {packages}")
@@ -79,6 +80,7 @@ async def __lookup_ecosystem(self, user_query: str, context: PipelineContext):
             model=context.sensitive.model,
             api_key=context.sensitive.api_key,
             base_url=context.sensitive.api_base,
+            extra_headers=context.metadata.get('extra_headers', None),
         )
 
         logger.info(f"Ecosystem in user query: {ecosystem}")
diff --git a/src/codegate/pipeline/secrets/secrets.py b/src/codegate/pipeline/secrets/secrets.py
index 0b3b4890..a775f0f5 100644
--- a/src/codegate/pipeline/secrets/secrets.py
+++ b/src/codegate/pipeline/secrets/secrets.py
@@ -175,6 +175,10 @@ async def process(
         Returns:
             PipelineResult containing the processed request and context with redaction metadata
         """
+
+        if 'messages' not in request:
+            return PipelineResult(request=request, context=context)
+
         secrets_manager = context.sensitive.manager
         if not secrets_manager or not isinstance(secrets_manager, SecretsManager):
             raise ValueError("Secrets manager not found in context")
diff --git a/src/codegate/providers/copilot/pipeline.py b/src/codegate/providers/copilot/pipeline.py
index eba5ceb4..3a0c8dee 100644
--- a/src/codegate/providers/copilot/pipeline.py
+++ b/src/codegate/providers/copilot/pipeline.py
@@ -1,5 +1,6 @@
 import json
 from abc import ABC, abstractmethod
+from typing import Dict
 
 import structlog
 from litellm.types.llms.openai import ChatCompletionRequest
@@ -41,6 +42,18 @@ def _request_id(headers: list[str]) -> str:
         print("No request ID found in headers")
         return ""
 
+    @staticmethod
+    def _get_copilot_headers(headers: Dict[str, str]) -> Dict[str, str]:
+        copilot_header_names = ['copilot-integration-id', 'editor-plugin-version', 'editor-version',
+                                'openai-intent', 'openai-organization', 'user-agent',
+                                'vscode-machineid', 'vscode-sessionid', 'x-github-api-version',
+                                'x-request-id']
+        copilot_headers = {}
+        for a_name in copilot_header_names:
+            copilot_headers[a_name] = headers.get(a_name, '')
+
+        return copilot_headers
+
     async def process_body(self, headers: list[str], body: bytes) -> bytes:
         """Common processing logic for all strategies"""
         try:
@@ -51,8 +64,10 @@ async def process_body(self, headers: list[str], body: bytes) -> bytes:
                 request=normalized_body,
                 provider=self.provider_name,
                 prompt_id=self._request_id(headers),
-                model=normalized_body.get("model", ""),
-                api_key=None,
+                model=normalized_body.get("model", "gpt-4o-mini"),
+                api_key = headers.get('authorization','').replace('Bearer ', ''),
+                api_base = "https://" + headers.get('host', ''),
+                extra_headers=CopilotPipeline._get_copilot_headers(headers)
             )
 
             if result.request: