Skip to content
This repository was archived by the owner on Jun 5, 2025. It is now read-only.

Commit 0e586db

Browse files
Merge pull request #215 from jhrozek/output_snippets
Warn about malicious, deprecated or archived packages in output snippets
2 parents d15ba88 + e61d782 commit 0e586db

File tree

5 files changed

+68
-13
lines changed

5 files changed

+68
-13
lines changed

src/codegate/cli.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from codegate.config import Config, ConfigurationError
1212
from codegate.db.connection import init_db_sync
1313
from codegate.server import init_app
14-
from src.codegate.storage.utils import restore_storage_backup
14+
from codegate.storage.utils import restore_storage_backup
1515

1616

1717
def validate_port(ctx: click.Context, param: click.Parameter, value: int) -> int:

src/codegate/llm_utils/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from src.codegate.llm_utils.extractor import PackageExtractor
2-
from src.codegate.llm_utils.llmclient import LLMClient
1+
from codegate.llm_utils.extractor import PackageExtractor
2+
from codegate.llm_utils.llmclient import LLMClient
33

44
__all__ = ["LLMClient", "PackageExtractor"]

src/codegate/llm_utils/extractor.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
from codegate.config import Config
66
from codegate.llm_utils.llmclient import LLMClient
7+
from codegate.storage import StorageEngine
78

89
logger = structlog.get_logger("codegate")
910

@@ -13,6 +14,9 @@ class PackageExtractor:
1314
Utility class to extract package names from code or queries.
1415
"""
1516

17+
def __init__(self):
18+
self.storage_engine = StorageEngine()
19+
1620
@staticmethod
1721
async def extract_packages(
1822
content: str,

src/codegate/pipeline/extract_snippets/output.py

Lines changed: 28 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from codegate.pipeline.base import CodeSnippet, PipelineContext, PipelineSensitiveData
99
from codegate.pipeline.extract_snippets.extract_snippets import extract_snippets
1010
from codegate.pipeline.output import OutputPipelineContext, OutputPipelineStep
11+
from codegate.storage import StorageEngine
1112

1213
logger = structlog.get_logger("codegate")
1314

@@ -16,7 +17,7 @@ class CodeCommentStep(OutputPipelineStep):
1617
"""Pipeline step that adds comments after code blocks"""
1718

1819
def __init__(self):
19-
pass
20+
self._storage_engine = StorageEngine()
2021

2122
@property
2223
def name(self) -> str:
@@ -41,8 +42,7 @@ def _create_chunk(self, original_chunk: ModelResponse, content: str) -> ModelRes
4142
object="chat.completion.chunk",
4243
)
4344

44-
@staticmethod
45-
async def _snippet_comment(snippet: CodeSnippet, secrets: PipelineSensitiveData) -> str:
45+
async def _snippet_comment(self, snippet: CodeSnippet, secrets: PipelineSensitiveData) -> str:
4646
"""Create a comment for a snippet"""
4747
snippet.libraries = await PackageExtractor.extract_packages(
4848
content=snippet.code,
@@ -52,14 +52,33 @@ async def _snippet_comment(snippet: CodeSnippet, secrets: PipelineSensitiveData)
5252
base_url=secrets.api_base,
5353
)
5454

55+
libobjects = await self._storage_engine.search_by_property("name", snippet.libraries)
56+
logger.info(f"Found {len(libobjects)} libraries in the storage engine")
57+
5558
libraries_text = ""
59+
warnings = []
60+
61+
# Use snippet.libraries to generate a CSV list of libraries
5662
if snippet.libraries:
57-
libraries_text = " ".join(f"`{lib}`" for lib in snippet.libraries)
58-
if libraries_text:
59-
comment = f"\nThe above code snippet uses the following libraries: {libraries_text}\n"
60-
else:
61-
comment = "\ncodegate didn't detect any libraries in the snippet\n"
62-
comment += "\n"
63+
libraries_text = ", ".join([f"`{lib}`" for lib in snippet.libraries])
64+
65+
for lib in libobjects:
66+
lib_name = lib.properties["name"]
67+
lib_status = lib.properties["status"]
68+
lib_url = f"https://www.insight.stacklok.com/report/{lib.properties['type']}/{lib_name}"
69+
70+
warnings.append(
71+
f"- The package `{lib_name}` is marked as **{lib_status}**.\n"
72+
f"- More information: [{lib_url}]({lib_url})\n"
73+
)
74+
75+
comment = ""
76+
if libraries_text != "":
77+
comment += f"\n\nCodegate detected the following libraries: {libraries_text}\n"
78+
79+
if warnings:
80+
comment += "\n### 🚨 Warnings\n" + "\n".join(warnings) + "\n"
81+
6382
return comment
6483

6584
def _split_chunk_at_code_end(self, content: str) -> tuple[str, str]:

src/codegate/storage/storage_engine.py

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
1+
from typing import List
2+
13
import structlog
24
import weaviate
35
import weaviate.classes as wvc
46
from weaviate.classes.config import DataType
5-
from weaviate.classes.query import MetadataQuery
7+
from weaviate.classes.query import Filter, MetadataQuery
68
from weaviate.embedded import EmbeddedOptions
79

810
from codegate.config import Config
@@ -88,6 +90,36 @@ def setup_schema(self, client):
8890
)
8991
logger.info(f"Weaviate schema for class {class_config['name']} setup complete.")
9092

93+
async def search_by_property(self, name: str, properties: List[str]) -> list[object]:
94+
if len(properties) == 0:
95+
return []
96+
97+
# Perform the vector search
98+
weaviate_client = self.get_client(self.data_path)
99+
if weaviate_client is None:
100+
logger.error("Could not find client, not returning results.")
101+
return []
102+
103+
if not weaviate_client:
104+
logger.error("Invalid client, cannot perform search.")
105+
return []
106+
107+
try:
108+
weaviate_client.connect()
109+
packages = weaviate_client.collections.get("Package")
110+
response = packages.query.fetch_objects(
111+
filters=Filter.by_property(name).contains_any(properties),
112+
)
113+
114+
if not response:
115+
return []
116+
return response.objects
117+
except Exception as e:
118+
logger.error(f"An error occurred: {str(e)}")
119+
return []
120+
finally:
121+
weaviate_client.close()
122+
91123
async def search(self, query: str, limit=5, distance=0.3, packages=None) -> list[object]:
92124
"""
93125
Search the 'Package' collection based on a query string.

0 commit comments

Comments
 (0)