Skip to content
This repository was archived by the owner on Jun 5, 2025. It is now read-only.

Database schema and sqlc configs #136

Merged
merged 4 commits into from
Dec 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,6 @@ htmlcov/

# Weaviate
weaviate_data/

# Codegate Dashboard DB
codegate.db
200 changes: 198 additions & 2 deletions poetry.lock

Large diffs are not rendered by default.

5 changes: 4 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,11 @@ transformers = ">=4.46.3"
structlog = ">=24.4.0"
litellm = "^1.52.16"
llama_cpp_python = ">=0.3.2"

cryptography = "^44.0.0"
sqlalchemy = "^2.0.28"
greenlet = "^3.0.3"
aiosqlite = "^0.19.0"

[tool.poetry.group.dev.dependencies]
pytest = ">=7.4.0"
pytest-cov = ">=4.1.0"
Expand Down
89 changes: 89 additions & 0 deletions sql/queries/queries.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
-- name: CreatePrompt :one
INSERT INTO prompts (
id,
timestamp,
provider,
system_prompt,
user_prompt,
type
) VALUES (?, ?, ?, ?, ?, ?) RETURNING *;

-- name: GetPrompt :one
SELECT * FROM prompts WHERE id = ?;

-- name: ListPrompts :many
SELECT * FROM prompts
ORDER BY timestamp DESC
LIMIT ? OFFSET ?;

-- name: CreateOutput :one
INSERT INTO outputs (
id,
prompt_id,
timestamp,
output
) VALUES (?, ?, ?, ?) RETURNING *;

-- name: GetOutput :one
SELECT * FROM outputs WHERE id = ?;

-- name: GetOutputsByPromptId :many
SELECT * FROM outputs
WHERE prompt_id = ?
ORDER BY timestamp DESC;

-- name: CreateAlert :one
INSERT INTO alerts (
id,
prompt_id,
output_id,
code_snippet,
trigger_string,
trigger_type,
trigger_category,
timestamp
) VALUES (?, ?, ?, ?, ?, ?, ?, ?) RETURNING *;

-- name: GetAlert :one
SELECT * FROM alerts WHERE id = ?;

-- name: ListAlertsByPrompt :many
SELECT * FROM alerts
WHERE prompt_id = ?
ORDER BY timestamp DESC;

-- name: GetSettings :one
SELECT * FROM settings ORDER BY id LIMIT 1;

-- name: UpsertSettings :one
INSERT INTO settings (
id,
ip,
port,
llm_model,
system_prompt,
other_settings
) VALUES (?, ?, ?, ?, ?, ?)
ON CONFLICT(id) DO UPDATE SET
ip = excluded.ip,
port = excluded.port,
llm_model = excluded.llm_model,
system_prompt = excluded.system_prompt,
other_settings = excluded.other_settings
RETURNING *;

-- name: GetPromptWithOutputsAndAlerts :many
SELECT
p.*,
o.id as output_id,
o.output,
a.id as alert_id,
a.code_snippet,
a.trigger_string,
a.trigger_type,
a.trigger_category
FROM prompts p
LEFT JOIN outputs o ON p.id = o.prompt_id
LEFT JOIN alerts a ON p.id = a.prompt_id
WHERE p.id = ?
ORDER BY o.timestamp DESC, a.timestamp DESC;
52 changes: 52 additions & 0 deletions sql/schema/schema.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
-- Schema for codegate database using SQLite

-- Prompts table
CREATE TABLE prompts (
id TEXT PRIMARY KEY, -- UUID stored as TEXT
timestamp DATETIME NOT NULL,
provider TEXT, -- VARCHAR(255)
system_prompt TEXT,
user_prompt TEXT NOT NULL,
type TEXT NOT NULL -- VARCHAR(50) (e.g. "fim", "chat")
);

-- Outputs table
CREATE TABLE outputs (
id TEXT PRIMARY KEY, -- UUID stored as TEXT
prompt_id TEXT NOT NULL,
timestamp DATETIME NOT NULL,
output TEXT NOT NULL,
FOREIGN KEY (prompt_id) REFERENCES prompts(id)
);

-- Alerts table
CREATE TABLE alerts (
id TEXT PRIMARY KEY, -- UUID stored as TEXT
prompt_id TEXT NOT NULL,
output_id TEXT NOT NULL,
code_snippet TEXT NOT NULL, -- VARCHAR(255)
trigger_string TEXT NOT NULL, -- VARCHAR(255)
trigger_type TEXT NOT NULL, -- VARCHAR(50)
trigger_category TEXT,
timestamp DATETIME NOT NULL,
FOREIGN KEY (prompt_id) REFERENCES prompts(id),
FOREIGN KEY (output_id) REFERENCES outputs(id)
);

-- Settings table
CREATE TABLE settings (
id TEXT PRIMARY KEY, -- UUID stored as TEXT
ip TEXT, -- VARCHAR(45)
port INTEGER,
llm_model TEXT, -- VARCHAR(255)
system_prompt TEXT,
other_settings TEXT -- JSON stored as TEXT
);

-- Create indexes for foreign keys and frequently queried columns
CREATE INDEX idx_outputs_prompt_id ON outputs(prompt_id);
CREATE INDEX idx_alerts_prompt_id ON alerts(prompt_id);
CREATE INDEX idx_alerts_output_id ON alerts(output_id);
CREATE INDEX idx_prompts_timestamp ON prompts(timestamp);
CREATE INDEX idx_outputs_timestamp ON outputs(timestamp);
CREATE INDEX idx_alerts_timestamp ON alerts(timestamp);
20 changes: 20 additions & 0 deletions sqlc.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
version: "2"
plugins:
- name: "python"
wasm:
url: "https://downloads.sqlc.dev/plugin/sqlc-gen-python_1.2.0.wasm"
sha256: "a6c5d174c407007c3717eea36ff0882744346e6ba991f92f71d6ab2895204c0e"

sql:
- engine: "sqlite"
schema: "sql/schema"
queries: "sql/queries"
codegen:
- plugin: "python"
out: "src/codegate/db"
options:
package: "codegate.db"
emit_sync_querier: true
emit_async_querier: true
query_parameter_limit: 5
emit_pydantic_models: true
137 changes: 137 additions & 0 deletions src/codegate/db/connection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
import asyncio
import datetime
import uuid
from pathlib import Path
from typing import Optional

import structlog
from litellm import ChatCompletionRequest
from sqlalchemy import create_engine, text
from sqlalchemy.ext.asyncio import create_async_engine

from codegate.db.models import Prompt

logger = structlog.get_logger("codegate")


class DbRecorder:

def __init__(self, sqlite_path: Optional[str] = None):
# Initialize SQLite database engine with proper async URL
if not sqlite_path:
current_dir = Path(__file__).parent
self._db_path = (current_dir.parent.parent.parent / "codegate.db").absolute()
else:
self._db_path = Path(sqlite_path).absolute()

logger.debug(f"Initializing DB from path: {self._db_path}")
engine_dict = {
"url": f"sqlite+aiosqlite:///{self._db_path}",
"echo": True, # Set to False in production
"isolation_level": "AUTOCOMMIT", # Required for SQLite
}
self._async_db_engine = create_async_engine(**engine_dict)
self._db_engine = create_engine(**engine_dict)

if not self.does_db_exist():
logger.info(f"Database does not exist at {self._db_path}. Creating..")
asyncio.run(self.init_db())

def does_db_exist(self):
return self._db_path.is_file()

async def init_db(self):
"""Initialize the database with the schema."""
if self.does_db_exist():
logger.info("Database already exists. Skipping initialization.")
return

# Get the absolute path to the schema file
current_dir = Path(__file__).parent
schema_path = current_dir.parent.parent.parent / "sql" / "schema" / "schema.sql"

if not schema_path.exists():
raise FileNotFoundError(f"Schema file not found at {schema_path}")

# Read the schema
with open(schema_path, "r") as f:
schema = f.read()

try:
# Execute the schema
async with self._async_db_engine.begin() as conn:
# Split the schema into individual statements and execute each one
statements = [stmt.strip() for stmt in schema.split(";") if stmt.strip()]
for statement in statements:
# Use SQLAlchemy text() to create executable SQL statements
await conn.execute(text(statement))
finally:
await self._async_db_engine.dispose()

async def record_request(
self, normalized_request: ChatCompletionRequest, is_fim_request: bool, provider_str: str
) -> Optional[Prompt]:
# Extract system prompt and user prompt from the messages
messages = normalized_request.get("messages", [])
system_prompt = []
user_prompt = []

for msg in messages:
if msg.get("role") == "system":
system_prompt.append(msg.get("content"))
elif msg.get("role") == "user":
user_prompt.append(msg.get("content"))

# If no user prompt found in messages, try to get from the prompt field
# (for non-chat completions)
if not user_prompt:
prompt = normalized_request.get("prompt")
if prompt:
user_prompt.append(prompt)

if not user_prompt:
logger.warning("No user prompt found in request.")
return None

# Create a new prompt record
prompt_params = Prompt(
id=str(uuid.uuid4()), # Generate a new UUID for the prompt
timestamp=datetime.datetime.now(datetime.timezone.utc),
provider=provider_str,
type="fim" if is_fim_request else "chat",
user_prompt="<|>".join(user_prompt),
system_prompt="<|>".join(system_prompt),
)
# There is a `create_prompt` method in queries.py automatically generated by sqlc
# However, the method is is buggy and doesn't work as expected.
# Manually writing the SQL query to insert the prompt record.
async with self._async_db_engine.begin() as conn:
sql = text(
"""
INSERT INTO prompts (id, timestamp, provider, system_prompt, user_prompt, type)
VALUES (:id, :timestamp, :provider, :system_prompt, :user_prompt, :type)
RETURNING *
"""
)
result = await conn.execute(sql, prompt_params.model_dump())
row = result.first()
if row is None:
return None

return Prompt(
id=row.id,
timestamp=row.timestamp,
provider=row.provider,
system_prompt=row.system_prompt,
user_prompt=row.user_prompt,
type=row.type,
)


def init_db_sync():
"""DB will be initialized in the constructor in case it doesn't exist."""
DbRecorder()


if __name__ == "__main__":
init_db_sync()
42 changes: 42 additions & 0 deletions src/codegate/db/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# Code generated by sqlc. DO NOT EDIT.
# versions:
# sqlc v1.27.0
from typing import Any, Optional

import pydantic


class Alert(pydantic.BaseModel):
id: Any
prompt_id: Any
output_id: Any
code_snippet: Any
trigger_string: Any
trigger_type: Any
trigger_category: Optional[Any]
timestamp: Any


class Output(pydantic.BaseModel):
id: Any
prompt_id: Any
timestamp: Any
output: Any


class Prompt(pydantic.BaseModel):
id: Any
timestamp: Any
provider: Optional[Any]
system_prompt: Optional[Any]
user_prompt: Any
type: Any


class Setting(pydantic.BaseModel):
id: Any
ip: Optional[Any]
port: Optional[Any]
llm_model: Optional[Any]
system_prompt: Optional[Any]
other_settings: Optional[Any]
Loading
Loading