Skip to content
This repository was archived by the owner on Jun 5, 2025. It is now read-only.

simplify signature initialization for optimization #354

Merged
merged 3 commits into from
Dec 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions src/codegate/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,16 +305,16 @@ def serve(
ca = CertificateAuthority.get_instance()
ca.ensure_certificates_exist()

# Set up event loop
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)

# Initialize secrets manager and pipeline factory
secrets_manager = SecretsManager()
pipeline_factory = PipelineFactory(secrets_manager)

app = init_app(pipeline_factory)

# Set up event loop
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)

# Run the server
try:
loop.run_until_complete(run_servers(cfg, app))
Expand Down
2 changes: 2 additions & 0 deletions src/codegate/pipeline/secrets/secrets.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ class CodegateSecrets(PipelineStep):
def __init__(self):
"""Initialize the CodegateSecrets pipeline step."""
super().__init__()
# Initialize and load signatures immediately
CodegateSignatures.initialize("signatures.yaml")

@property
def name(self) -> str:
Expand Down
38 changes: 12 additions & 26 deletions src/codegate/pipeline/secrets/signatures.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ class CodegateSignatures:
"""Main class for detecting secrets in text using regex patterns."""

_instance_lock: ClassVar[Lock] = Lock()
_signatures_loaded: ClassVar[bool] = False
_signature_groups: ClassVar[List[SignatureGroup]] = []
_compiled_regexes: ClassVar[Dict[str, re.Pattern]] = {}
_yaml_path: ClassVar[Optional[str]] = None
Expand All @@ -48,22 +47,23 @@ class CodegateSignatures:
def reset(cls) -> None:
"""Reset the cached patterns."""
with cls._instance_lock:
cls._signatures_loaded = False
cls._signature_groups = []
cls._compiled_regexes = {}
cls._yaml_path = None
logger.debug("SecretFinder cache reset")

@classmethod
def initialize(cls, yaml_path: str) -> None:
"""Initialize the SecretFinder with a YAML file path."""
"""Initialize the SecretFinder with a YAML file path and load signatures."""
if not Path(yaml_path).exists():
raise FileNotFoundError(f"Signatures file not found: {yaml_path}")

with cls._instance_lock:
cls._yaml_path = yaml_path
cls._signatures_loaded = False
logger.debug(f"SecretFinder initialized with {yaml_path}")
# Only initialize if not already initialized with this path
if cls._yaml_path != yaml_path:
cls._yaml_path = yaml_path
cls._load_signatures()
logger.debug(f"SecretFinder initialized with {yaml_path}")

@classmethod
def _preprocess_yaml(cls, content: str) -> str:
Expand Down Expand Up @@ -172,6 +172,10 @@ def _add_signature_group(cls, name: str, patterns: Dict[str, str]) -> None:
def _load_signatures(cls) -> None:
"""Load signature patterns from the YAML file."""
try:
# Clear existing signatures before loading new ones
cls._signature_groups = []
cls._compiled_regexes = {}

yaml_data = cls._load_yaml(cls._yaml_path)

# Add custom GitHub token patterns
Expand Down Expand Up @@ -205,32 +209,14 @@ def _load_signatures(cls) -> None:
logger.error(f"Error loading signatures: {e}")
raise

@classmethod
def _ensure_signatures_loaded(cls) -> None:
"""Ensure signatures are loaded before use."""
if not cls._signatures_loaded:
with cls._instance_lock:
if not cls._signatures_loaded:
if not cls._yaml_path:
raise RuntimeError("SecretFinder not initialized. Call initialize() first.")
try:
cls._load_signatures()
cls._signatures_loaded = True
except Exception as e:
logger.error(f"Failed to load signatures: {e}")
raise

@classmethod
def find_in_string(cls, text: str) -> List[Match]:
"""Search for secrets in the provided string."""
if not text:
return []

try:
cls._ensure_signatures_loaded()
except Exception as e:
logger.error(f"Failed to load signatures: {e}")
return []
if not cls._yaml_path:
raise RuntimeError("SecretFinder not initialized.")

matches = []
lines = text.splitlines()
Expand Down
4 changes: 0 additions & 4 deletions src/codegate/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from codegate import __description__, __version__
from codegate.dashboard.dashboard import dashboard_router
from codegate.pipeline.factory import PipelineFactory
from codegate.pipeline.secrets.signatures import CodegateSignatures
from codegate.providers.anthropic.provider import AnthropicProvider
from codegate.providers.llamacpp.provider import LlamaCppProvider
from codegate.providers.ollama.provider import OllamaProvider
Expand All @@ -31,9 +30,6 @@ def init_app(pipeline_factory: PipelineFactory) -> FastAPI:
# Create provider registry
registry = ProviderRegistry(app)

# Initialize SignaturesFinder
CodegateSignatures.initialize("signatures.yaml")

# Register all known providers
registry.add_provider(
"openai",
Expand Down
5 changes: 0 additions & 5 deletions tests/pipeline/secrets/test_signatures.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,14 +69,12 @@ def test_initialize_and_reset(temp_yaml_file):

CodegateSignatures.reset()
assert CodegateSignatures._yaml_path is None
assert not CodegateSignatures._signatures_loaded
assert not CodegateSignatures._signature_groups
assert not CodegateSignatures._compiled_regexes


def test_find_in_string_with_aws_credentials(temp_yaml_file):
CodegateSignatures.initialize(temp_yaml_file)
CodegateSignatures._signatures_loaded = False # Force reload of signatures

test_string = """
aws_access_key = 'AKIAIOSFODNN7EXAMPLE'
Expand All @@ -100,7 +98,6 @@ def test_find_in_string_with_github_token():

try:
CodegateSignatures.initialize(f.name)
CodegateSignatures._signatures_loaded = False # Force reload of signatures

test_string = "github_token = 'ghp_1234567890abcdef1234567890abcdef123456'"
matches = CodegateSignatures.find_in_string(test_string)
Expand All @@ -121,7 +118,6 @@ def test_find_in_string_with_github_token():

def test_find_in_string_with_no_matches(temp_yaml_file):
CodegateSignatures.initialize(temp_yaml_file)
CodegateSignatures._signatures_loaded = False # Force reload of signatures

test_string = "No secrets here!"
matches = CodegateSignatures.find_in_string(test_string)
Expand Down Expand Up @@ -158,7 +154,6 @@ def test_duplicate_patterns():

try:
CodegateSignatures.initialize(f.name)
CodegateSignatures._signatures_loaded = False # Force reload of signatures

test_string = "aws_key = 'AKIAIOSFODNN7EXAMPLE'"
matches = CodegateSignatures.find_in_string(test_string)
Expand Down
7 changes: 0 additions & 7 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,13 +105,6 @@ def test_provider_registration(mock_registry, mock_secrets_mgr, mock_pipeline_fa
assert "ollama" in provider_names


@patch("codegate.server.CodegateSignatures")
def test_signatures_initialization(mock_signatures, mock_pipeline_factory) -> None:
"""Test that signatures are initialized correctly."""
init_app(mock_pipeline_factory)
mock_signatures.initialize.assert_called_once_with("signatures.yaml")


def test_pipeline_initialization(mock_pipeline_factory) -> None:
"""Test that pipelines are initialized correctly."""
app = init_app(mock_pipeline_factory)
Expand Down
7 changes: 0 additions & 7 deletions tests/test_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,13 +99,6 @@ def test_provider_registration(mock_registry, mock_secrets_mgr, mock_pipeline_fa
assert "ollama" in provider_names


@patch("codegate.server.CodegateSignatures")
def test_signatures_initialization(mock_signatures, mock_pipeline_factory) -> None:
"""Test that signatures are initialized correctly."""
init_app(mock_pipeline_factory)
mock_signatures.initialize.assert_called_once_with("signatures.yaml")


def test_pipeline_initialization(mock_pipeline_factory) -> None:
"""Test that pipelines are initialized correctly."""
app = init_app(mock_pipeline_factory)
Expand Down
Loading