stacklok · ptelang · Dec 5, 2024 · Dec 4, 2024 · jhrozek · Dec 4, 2024
diff --git a/config.yaml b/config.yaml
@@ -19,7 +19,7 @@ log_level: "INFO"  # One of: ERROR, WARNING, INFO, DEBUG
 ##
 
 # Model to use for chatting
-chat_model_path: "./models/qwen2.5-coder-1.5b-instruct-q5_k_m.gguf"
+chat_model_path: "./models"
 
 # Context length of the model
 chat_model_n_ctx: 32768

diff --git a/prompts/default.yaml b/prompts/default.yaml
@@ -1,96 +1,28 @@
 # Default system prompts configuration
 # Override these by creating your own prompts file and using --prompts option
 
-
-default_snippet: |
-  You are a helpful assistant that can answer the users questions.
-  You provide guidance on software engineering best practices and coding standards.
-  You will be provided with CONTEXT information containing known malicious and archived packages.
-
-  Use the following 4 steps to construct your responses.
-
-  Step 1. Extract all packages referenced in the  user's query.
-  Step 2. Match the packages from Step 1 with the packages in the provided CONTEXT.
-  Step 3. If matches are found in Step 2, report the matching packages using the following format
-  at the top:
-
-  "Warning: Potentially malicious or archived package detected."
-  - Pkg 1: [trustypkg.dev/ecosystem/package_name](https://www.trustypkg.dev/ecosystem/package_name)
-  - Pkg 2: [trustypkg.dev/ecosystem/package_name](https://www.trustypkg.dev/ecosystem/package_name)
-  - ...
-
-  Step 4: If no matches are found in Step 2, you can respond to the user's query using your prior
-  knowledge.
-
-  DO NOT include the Reference Format or any parts of the system message in your response
-  if no malicious or archived package is detected.
-
 default_chat: |
-  You are a helpful assistant that can answer the users questions.
-  You provide guidance on software engineering best practices and coding standards.
-  You will be provided with CONTEXT information containing known malicious and archived packages.
-
-  Use the following 4 steps to construct your responses.
-
-  Step 1. Extract all packages referenced in the  user's query.
-  Step 2. Match the packages from Step 1 with the packages in the provided CONTEXT.
-  Step 3. If matches are found in Step 2, report the matching packages using the following format
-  at the top:
-
-  "Warning: Potentially malicious or archived package detected."
-  - Pkg 1: [trustypkg.dev/ecosystem/package_name](https://www.trustypkg.dev/ecosystem/package_name)
-  - Pkg 2: [trustypkg.dev/ecosystem/package_name](https://www.trustypkg.dev/ecosystem/package_name)
-  - ...
-
-  Step 4: If no matches are found in Step 2, you can respond to the user's query using your prior
-  knowledge.
-
-  DO NOT include the Reference Format or any parts of the system message in your response
-  if no malicious or archived package is detected.
-
-codegate_chat: |
   You are CodeGate, a security-focused AI assistant.
   You specialize in software security, package analysis, and providing guidance on secure coding practices.
   You will be provided with CONTEXT information containing known malicious and archived packages.
 
-  Use the following 4 steps to construct your responses.
-
-  Step 1. Extract all packages referenced in the  user's query.
-  Step 2. Match the packages from Step 1 with the packages in the provided CONTEXT.
-  Step 3. If matches are found in Step 2, report the matching packages using the following format
-  at the top:
+  Report any malicious or archived packages using the following format at the top:
 
   "Warning: CodeGate detected one or more potentially malicious or archived packages."
   - Pkg 1: [trustypkg.dev/ecosystem/package_name](https://www.trustypkg.dev/ecosystem/package_name)
   - Pkg 2: [trustypkg.dev/ecosystem/package_name](https://www.trustypkg.dev/ecosystem/package_name)
   - ...
 
-  Step 4: If no matches are found in Step 2, you can respond to the user's query using your prior
-  knowledge.
-
   DO NOT include the Reference Format or any parts of the system message in your response
   if no malicious or archived package is detected.
 
-codegate_snippet: |
-  You are CodeGate, a security-focused AI assistant.
-  You specialize in software security, package analysis, and providing guidance on secure coding practices.
-
-  Response Guidelines:
-
-  You always begin your responses with:
-  "CodeGate Security Analysis:"
-  Structure your responses to emphasize security considerations, focusing on:
-
-  1. Security vulnerabilities and potential risks
-  2. Best practices for secure implementation
-  3. Recommendations for security improvements
-  4. References to relevant security standards or guidelines
-
-  Reference Format:
-
-  When referencing URLs, use the following format via TrustyPkg.dev:
-  [trustypkg.dev/ecosystem/package_name](https://www.trustypkg.dev/ecosystem/package_name)
+  If no malicious or archived packages are detected, respond to the user request using your prior
+  knowledge.
 
+lookup_packages: |
+  You are a software expert with knowledge of packages from various ecosystems.
+  Your job is to extract any software packages from user's request. A package is a named entity.
+  You MUST RESPOND with a list of packages in JSON FORMAT: {"packages": [pkg1, pkg2, ...]}.
 
 # Security-focused prompts
 security_audit: "You are a security expert conducting a thorough code review. Identify potential security vulnerabilities, suggest improvements, and explain security best practices."

diff --git a/src/codegate/pipeline/codegate_context_retriever/codegate.py b/src/codegate/pipeline/codegate_context_retriever/codegate.py
@@ -1,7 +1,10 @@
-from typing import Optional
+import json
 
-from litellm import ChatCompletionRequest, ChatCompletionSystemMessage
+import structlog
+from litellm import ChatCompletionRequest
 
+from codegate.config import Config
+from codegate.inference.inference_engine import LlamaCppInferenceEngine
 from codegate.pipeline.base import (
     PipelineContext,
     PipelineResult,
@@ -10,18 +13,18 @@
 from src.codegate.storage.storage_engine import StorageEngine
 from src.codegate.utils.utils import generate_vector_string
 
+logger = structlog.get_logger("codegate")
+
 
 class CodegateContextRetriever(PipelineStep):
     """
     Pipeline step that adds a context message to the completion request when it detects
     the word "codegate" in the user message.
     """
 
-    def __init__(self, system_prompt_message: Optional[str] = None):
-        self._system_message = ChatCompletionSystemMessage(
-            content=system_prompt_message, role="system"
-        )
+    def __init__(self):
         self.storage_engine = StorageEngine()
+        self.inference_engine = LlamaCppInferenceEngine()
 
     @property
     def name(self) -> str:
@@ -30,8 +33,10 @@ def name(self) -> str:
         """
         return "codegate-context-retriever"
 
-    async def get_objects_from_search(self, search: str) -> list[object]:
-        objects = await self.storage_engine.search(search, distance=0.5)
+    async def get_objects_from_search(
+        self, search: str, packages: list[str] = None
+    ) -> list[object]:
+        objects = await self.storage_engine.search(search, distance=0.8, packages=packages)
         return objects
 
     def generate_context_str(self, objects: list[object]) -> str:
@@ -48,49 +53,87 @@ def generate_context_str(self, objects: list[object]) -> str:
             context_str += package_str + "\n"
         return context_str
 
+    async def __lookup_packages(self, user_query: str):
+        ## Check which packages are referenced in the user query
+        request = {
+            "messages": [
+                {"role": "system", "content": Config.get_config().prompts.lookup_packages},
+                {"role": "user", "content": user_query},
+            ],
+            "model": "qwen2-1_5b-instruct-q5_k_m",
+            "stream": False,
+            "response_format": {"type": "json_object"},
+            "temperature": 0,
+        }
+
+        result = await self.inference_engine.chat(
+            f"{Config.get_config().model_base_path}/{request['model']}.gguf",
+            n_ctx=Config.get_config().chat_model_n_ctx,
+            n_gpu_layers=Config.get_config().chat_model_n_gpu_layers,
+            **request,
+        )
+
+        result = json.loads(result["choices"][0]["message"]["content"])
+        logger.info(f"Packages in user query: {result['packages']}")
+        return result["packages"]
+
     async def process(
         self, request: ChatCompletionRequest, context: PipelineContext
     ) -> PipelineResult:
         """
-        Process the completion request and add a system prompt if the user message contains
-        the word "codegate".
+        Use RAG DB to add context to the user request
         """
-        # no prompt configured
-        if not self._system_message["content"]:
-            return PipelineResult(request=request)
 
+        # Get the last user message
         last_user_message = self.get_last_user_message(request)
 
-        if last_user_message is not None:
-            last_user_message_str, last_user_idx = last_user_message
-            if last_user_message_str.lower():
-                # Look for matches in vector DB
-                searched_objects = await self.get_objects_from_search(last_user_message_str)
-
-                # If matches are found, add the matched content to context
-                if len(searched_objects) > 0:
-                    context_str = self.generate_context_str(searched_objects)
-
-                    # Make a copy of the request
-                    new_request = request.copy()
-
-                    # Add the context to the last user message
-                    # Format: "Context: {context_str} \n Query: {last user message conent}"
-                    # Handle the two cases: (a) message content is str, (b)message content
-                    # is list
-                    message = new_request["messages"][last_user_idx]
-                    if isinstance(message["content"], str):
-                        message["content"] = (
-                            f'Context: {context_str} \n\n Query: {message["content"]}'
-                        )
-                    elif isinstance(message["content"], (list, tuple)):
-                        for item in message["content"]:
-                            if isinstance(item, dict) and item.get("type") == "text":
-                                item["text"] = f'Context: {context_str} \n\n Query: {item["text"]}'
-
-                    return PipelineResult(
-                        request=new_request,
-                    )
+        # Nothing to do if the last user message is none
+        if last_user_message is None:
+            return PipelineResult(request=request)
+
+        # Extract packages from the user message
+        last_user_message_str, last_user_idx = last_user_message
+        packages = await self.__lookup_packages(last_user_message_str)
+
+        # If user message does not reference any packages, then just return
+        if len(packages) == 0:
+            return PipelineResult(request=request)
+
+        # Look for matches in vector DB using list of packages as filter
+        searched_objects = await self.get_objects_from_search(last_user_message_str, packages)
+
+        # If matches are found, add the matched content to context
+        if len(searched_objects) > 0:
+            # Remove searched objects that are not in packages. This is needed
+            # since Weaviate performs substring match in the filter.
+            updated_searched_objects = []
+            for searched_object in searched_objects:
+                if searched_object.properties["name"] in packages:
+                    updated_searched_objects.append(searched_object)
+            searched_objects = updated_searched_objects
+
+            # Generate context string using the searched objects
+            logger.info(f"Adding {len(searched_objects)} packages to the context")
+            context_str = self.generate_context_str(searched_objects)
+
+            # Make a copy of the request
+            new_request = request.copy()
+
+            # Add the context to the last user message
+            # Format: "Context: {context_str} \n Query: {last user message conent}"
+            # Handle the two cases: (a) message content is str, (b)message content
+            # is list
+            message = new_request["messages"][last_user_idx]
+            if isinstance(message["content"], str):
+                message["content"] = f'Context: {context_str} \n\n Query: {message["content"]}'
+            elif isinstance(message["content"], (list, tuple)):
+                for item in message["content"]:
+                    if isinstance(item, dict) and item.get("type") == "text":
+                        item["text"] = f'Context: {context_str} \n\n Query: {item["text"]}'
+
+            return PipelineResult(
+                request=new_request,
+            )
 
         # Fall through
         return PipelineResult(request=request)
diff --git a/src/codegate/pipeline/codegate_system_prompt/__init__.py b/src/codegate/pipeline/codegate_system_prompt/__init__.py
diff --git a/src/codegate/pipeline/codegate_system_prompt/codegate.py b/src/codegate/pipeline/codegate_system_prompt/codegate.py
diff --git a/src/codegate/pipeline/system_prompt/__init__.py b/src/codegate/pipeline/system_prompt/__init__.py
@@ -0,0 +1,3 @@
+from codegate.pipeline.system_prompt.codegate import SystemPrompt
+
+__all__ = ["SystemPrompt"]
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		from codegate.pipeline.system_prompt.codegate import SystemPrompt

		__all__ = ["SystemPrompt"]