From 0d24c7fd9c793db5317c50ef1639b9e765e44bf9 Mon Sep 17 00:00:00 2001 From: Pankaj Telang Date: Thu, 12 Dec 2024 13:11:58 +0000 Subject: [PATCH] Add copilot headers/auth for extracting package/ecosystem --- src/codegate/llm_utils/extractor.py | 6 +++++- src/codegate/llm_utils/llmclient.py | 4 ++++ src/codegate/pipeline/base.py | 5 ++++- .../codegate_context_retriever/codegate.py | 2 ++ src/codegate/pipeline/secrets/secrets.py | 4 ++++ src/codegate/providers/copilot/pipeline.py | 19 +++++++++++++++++-- 6 files changed, 36 insertions(+), 4 deletions(-) diff --git a/src/codegate/llm_utils/extractor.py b/src/codegate/llm_utils/extractor.py index 272aa342..b4b2514e 100644 --- a/src/codegate/llm_utils/extractor.py +++ b/src/codegate/llm_utils/extractor.py @@ -1,4 +1,4 @@ -from typing import List, Optional +from typing import Dict, List, Optional import structlog @@ -24,6 +24,7 @@ async def extract_packages( model: str = None, base_url: Optional[str] = None, api_key: Optional[str] = None, + extra_headers: Optional[Dict[str, str]] = None ) -> List[str]: """Extract package names from the given content.""" system_prompt = Config.get_config().prompts.lookup_packages @@ -35,6 +36,7 @@ async def extract_packages( model=model, api_key=api_key, base_url=base_url, + extra_headers=extra_headers, ) # Handle both formats: {"packages": [...]} and direct list [...] @@ -49,6 +51,7 @@ async def extract_ecosystem( model: str = None, base_url: Optional[str] = None, api_key: Optional[str] = None, + extra_headers: Optional[Dict[str, str]] = None ) -> List[str]: """Extract ecosystem from the given content.""" system_prompt = Config.get_config().prompts.lookup_ecosystem @@ -60,6 +63,7 @@ async def extract_ecosystem( model=model, api_key=api_key, base_url=base_url, + extra_headers=extra_headers, ) ecosystem = result if isinstance(result, str) else result.get("ecosystem") diff --git a/src/codegate/llm_utils/llmclient.py b/src/codegate/llm_utils/llmclient.py index f2b301c4..64c04f1a 100644 --- a/src/codegate/llm_utils/llmclient.py +++ b/src/codegate/llm_utils/llmclient.py @@ -27,6 +27,7 @@ async def complete( model: str = None, api_key: Optional[str] = None, base_url: Optional[str] = None, + extra_headers: Optional[Dict[str, str]] = None, **kwargs, ) -> Dict[str, Any]: """ @@ -53,6 +54,7 @@ async def complete( model, api_key, base_url, + extra_headers, **kwargs, ) @@ -102,6 +104,7 @@ async def _complete_litellm( model: str, api_key: str, base_url: Optional[str] = None, + extra_headers: Optional[Dict[str, str]] = None, **kwargs, ) -> Dict[str, Any]: # Use the private method to create the request @@ -134,6 +137,7 @@ async def _complete_litellm( temperature=request["temperature"], base_url=base_url, response_format=request["response_format"], + extra_headers=extra_headers ) content = response["choices"][0]["message"]["content"] diff --git a/src/codegate/pipeline/base.py b/src/codegate/pipeline/base.py index e6bbd5cd..f2f44e65 100644 --- a/src/codegate/pipeline/base.py +++ b/src/codegate/pipeline/base.py @@ -224,6 +224,7 @@ async def process_request( model: str, api_key: Optional[str] = None, api_base: Optional[str] = None, + extra_headers: Optional[Dict[str, str]] = None ) -> PipelineResult: """Process a request through all pipeline steps""" self.context.sensitive = PipelineSensitiveData( @@ -235,6 +236,7 @@ async def process_request( api_base=api_base, ) self.context.metadata["prompt_id"] = prompt_id + self.context.metadata["extra_headers"] = extra_headers current_request = request for step in self.pipeline_steps: @@ -271,9 +273,10 @@ async def process_request( model: str, api_key: Optional[str] = None, api_base: Optional[str] = None, + extra_headers: Optional[Dict[str, str]] = None ) -> PipelineResult: """Create a new pipeline instance and process the request""" instance = self.create_instance() return await instance.process_request( - request, provider, prompt_id, model, api_key, api_base + request, provider, prompt_id, model, api_key, api_base, extra_headers ) diff --git a/src/codegate/pipeline/codegate_context_retriever/codegate.py b/src/codegate/pipeline/codegate_context_retriever/codegate.py index ffb9bbfb..62780235 100644 --- a/src/codegate/pipeline/codegate_context_retriever/codegate.py +++ b/src/codegate/pipeline/codegate_context_retriever/codegate.py @@ -66,6 +66,7 @@ async def __lookup_packages(self, user_query: str, context: PipelineContext): model=context.sensitive.model, api_key=context.sensitive.api_key, base_url=context.sensitive.api_base, + extra_headers=context.metadata.get('extra_headers', None), ) logger.info(f"Packages in user query: {packages}") @@ -79,6 +80,7 @@ async def __lookup_ecosystem(self, user_query: str, context: PipelineContext): model=context.sensitive.model, api_key=context.sensitive.api_key, base_url=context.sensitive.api_base, + extra_headers=context.metadata.get('extra_headers', None), ) logger.info(f"Ecosystem in user query: {ecosystem}") diff --git a/src/codegate/pipeline/secrets/secrets.py b/src/codegate/pipeline/secrets/secrets.py index 0b3b4890..a775f0f5 100644 --- a/src/codegate/pipeline/secrets/secrets.py +++ b/src/codegate/pipeline/secrets/secrets.py @@ -175,6 +175,10 @@ async def process( Returns: PipelineResult containing the processed request and context with redaction metadata """ + + if 'messages' not in request: + return PipelineResult(request=request, context=context) + secrets_manager = context.sensitive.manager if not secrets_manager or not isinstance(secrets_manager, SecretsManager): raise ValueError("Secrets manager not found in context") diff --git a/src/codegate/providers/copilot/pipeline.py b/src/codegate/providers/copilot/pipeline.py index eba5ceb4..3a0c8dee 100644 --- a/src/codegate/providers/copilot/pipeline.py +++ b/src/codegate/providers/copilot/pipeline.py @@ -1,5 +1,6 @@ import json from abc import ABC, abstractmethod +from typing import Dict import structlog from litellm.types.llms.openai import ChatCompletionRequest @@ -41,6 +42,18 @@ def _request_id(headers: list[str]) -> str: print("No request ID found in headers") return "" + @staticmethod + def _get_copilot_headers(headers: Dict[str, str]) -> Dict[str, str]: + copilot_header_names = ['copilot-integration-id', 'editor-plugin-version', 'editor-version', + 'openai-intent', 'openai-organization', 'user-agent', + 'vscode-machineid', 'vscode-sessionid', 'x-github-api-version', + 'x-request-id'] + copilot_headers = {} + for a_name in copilot_header_names: + copilot_headers[a_name] = headers.get(a_name, '') + + return copilot_headers + async def process_body(self, headers: list[str], body: bytes) -> bytes: """Common processing logic for all strategies""" try: @@ -51,8 +64,10 @@ async def process_body(self, headers: list[str], body: bytes) -> bytes: request=normalized_body, provider=self.provider_name, prompt_id=self._request_id(headers), - model=normalized_body.get("model", ""), - api_key=None, + model=normalized_body.get("model", "gpt-4o-mini"), + api_key = headers.get('authorization','').replace('Bearer ', ''), + api_base = "https://" + headers.get('host', ''), + extra_headers=CopilotPipeline._get_copilot_headers(headers) ) if result.request: