Skip to content

Commit 72518be

Browse files
authored
Merge pull request #7 from LemurPwned/feat/query-optim
Feat/query optim
2 parents e7a096b + a73879b commit 72518be

File tree

9 files changed

+101
-22
lines changed

9 files changed

+101
-22
lines changed

.github/workflows/release.yml

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,9 @@ jobs:
7272
permissions:
7373
contents: read
7474
packages: write
75+
env:
76+
REGISTRY: ghcr.io/lemurpwned
77+
RELEASE_VERSION: ${{ needs.build.outputs.version }}
7578

7679
steps:
7780
- name: Checkout repository
@@ -88,8 +91,8 @@ jobs:
8891
uses: docker/build-push-action@v5
8992
with:
9093
context: .
94+
file: ./docker/Dockerfile
9195
push: true
9296
tags: |
93-
latest
94-
${{ needs.build.outputs.version }}
95-
97+
${{ env.REGISTRY }}/cypher-shell:latest
98+
${{ env.REGISTRY }}/cypher-shell:${RELEASE_VERSION}

README.md

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ python -m cypher_shell --help
1717
or
1818

1919
```bash
20-
python -m cypher_shell --cfg-path configs/movies.yaml
20+
python -m cypher_shell run --cfg-path configs/movies.yaml
2121
```
2222

2323
where `configs/movies.yaml` is a configuration file that contains the node and relationship descriptions.
@@ -31,9 +31,13 @@ You need to set the `.env` file with your OpenAI API key and Neo4j credentials.
3131
You can also run the tool using Docker.
3232

3333
```bash
34-
docker run --env .env -it ghcr.io/lemurpwned/cypher-shell:latest python3 -m cypher_shell --cfg-path configs/movies.yaml
34+
docker run --env .env -it ghcr.io/lemurpwned/cypher-shell:latest python3 -m cypher_shell run --cfg-path configs/movies.yaml
3535
```
3636

3737
### Run query without LLM
3838

3939
Just preface the query with: `cs:` and the query will not be rewritten by the llm.
40+
41+
## Notes:
42+
43+
- sometimes getting the schema automatically is better than providing it manually.

cypher_shell/__main__.py

Lines changed: 42 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import json
12
import logging
23
import os
34

@@ -16,6 +17,7 @@
1617
from rich.console import Console
1718

1819
from .agent import CypherFlowSimple
20+
from .optimizer import Optimizer
1921
from .query_runner import QueryRunner
2022
from .utils import get_logger
2123

@@ -38,13 +40,7 @@ def validate(self, document):
3840
raise ValidationError(message="Query cannot be empty", cursor_position=0)
3941

4042

41-
@app.command(help="Run a Cypher shell")
42-
def run(
43-
cfg_path: str | None = typer.Option(default=None, help="Path to the .yaml configuration file"),
44-
env_path: str | None = typer.Option(default=None, help="Path to the .env file"),
45-
debug: bool = typer.Option(default=False, help="Enable debug mode"),
46-
):
47-
load_dotenv(env_path, override=True)
43+
def load_cfg(cfg_path: str | None = None) -> dict:
4844
cfg = {}
4945
if cfg_path is None:
5046
console.print(
@@ -59,6 +55,45 @@ def run(
5955
assert (
6056
"node_descriptions" in cfg and "relationship_descriptions" in cfg
6157
), "Both node_descriptions and relationship_descriptions must be provided in the configuration file"
58+
return cfg
59+
60+
61+
@app.command(help="Optimize a Cypher query. Based on the query logs.")
62+
def optimize(
63+
log_path: str,
64+
cfg_path: str | None = typer.Option(default=None, help="Path to the .yaml configuration file"),
65+
env_path: str | None = typer.Option(default=None, help="Path to the .env file"),
66+
debug: bool = typer.Option(default=False, help="Enable debug mode"),
67+
min_timing: float = typer.Option(default=15.0, help="Minimum timing to consider for optimization"),
68+
):
69+
load_dotenv(env_path, override=True)
70+
cfg = load_cfg(cfg_path)
71+
optimizer = Optimizer(cfg)
72+
# read up all the lines in the file
73+
with open(log_path) as f:
74+
all_queries = filter(
75+
lambda x: x["timing"] > min_timing,
76+
[json.loads(line) for line in f if json.loads(line)],
77+
)
78+
for query in sorted(
79+
all_queries,
80+
key=lambda x: x["timing"],
81+
reverse=True,
82+
):
83+
logger.info(f"Optimizing query: {query}")
84+
resp = optimizer.optimize_query(query)
85+
console.print(resp)
86+
87+
88+
@app.command(help="Run a Cypher shell")
89+
def run(
90+
cfg_path: str | None = typer.Option(default=None, help="Path to the .yaml configuration file"),
91+
env_path: str | None = typer.Option(default=None, help="Path to the .env file"),
92+
debug: bool = typer.Option(default=False, help="Enable debug mode"),
93+
):
94+
load_dotenv(env_path, override=True)
95+
cfg = load_cfg(cfg_path)
96+
6297
if debug:
6398
logger.setLevel(logging.DEBUG)
6499
query_runner = QueryRunner(

cypher_shell/memory.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,9 @@ def add(self, message: MemoryMessage):
4949
self.memory.append(message)
5050

5151
def add_user_result(self, user_query: str, machine_query: str, result: str, timing: float = -1):
52-
self.memory.append(MemoryMessage(source="user", type="result", content=result))
53-
self.memory.append(MemoryMessage(source="user", type="query", content=user_query))
54-
self.memory.append(MemoryMessage(source="system", type="query", content=machine_query))
52+
self.add(MemoryMessage(source="user", type="result", content=result))
53+
self.add(MemoryMessage(source="user", type="query", content=user_query))
54+
self.add(MemoryMessage(source="system", type="query", content=machine_query))
5555
if self.track_user_queries:
5656
self.user_queries[user_query] = result
5757
if self.write_to_file:
@@ -62,7 +62,6 @@ def add_user_result(self, user_query: str, machine_query: str, result: str, timi
6262
"cypher_query": machine_query,
6363
"timing": timing,
6464
},
65-
indent=4,
6665
)
6766
)
6867

cypher_shell/optimizer.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
from typing import Any
2+
3+
from rich.markdown import Markdown
4+
from swarm import Swarm
5+
6+
from .agent import Agent
7+
from .prompts.optim import OPTIMIZATION_PROMPT_GENERAL
8+
9+
10+
class Optimizer:
11+
def __init__(self, cfg: dict):
12+
self.optimizer_agent = Agent(
13+
name="Cypher Query Optimizer",
14+
model="gpt-4o-mini",
15+
temperature=0.0,
16+
instructions=OPTIMIZATION_PROMPT_GENERAL,
17+
)
18+
self.client = Swarm()
19+
20+
def __call__(self, *args: Any, **kwds: Any) -> Any:
21+
pass
22+
23+
def optimize_query(self, query: dict[str, Any]) -> str:
24+
msg = self.client.run(
25+
agent=self.optimizer_agent,
26+
messages=[
27+
{
28+
"role": "user",
29+
"content": f"Query: {query}. Reread the query carefully: {query}",
30+
}
31+
],
32+
)
33+
return Markdown(msg.messages[-1]["content"])

cypher_shell/prompts/cypher.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ def _query_run(session: neo4j.Session, query: str):
5353

5454

5555
def get_nodes_schema(session: neo4j.Session):
56+
logger.info("Retrieving node schema")
5657
schema_query = """CALL db.schema.visualization()"""
5758
results = session.run(schema_query)
5859
data = results.data()
@@ -71,6 +72,7 @@ def node_and_rel_labels(session: neo4j.Session):
7172

7273

7374
def get_properties(session: neo4j.Session):
75+
logger.info("Retrieving node and relationship properties")
7476
node_results = session.run("CALL db.schema.nodeTypeProperties()")
7577
rel_results = session.run("CALL db.schema.relTypeProperties()")
7678
node_data = node_results.data()

cypher_shell/prompts/optim.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
OPTIMIZATION_PROMPT_GENERAL = """
2+
You're an expert at optimizing Cypher queries.
3+
4+
You're given a Cypher query and the user query.
5+
You need to optimize the query to be more efficient.
6+
7+
If you think adding an index will help, suggest it.
8+
"""

docker/Dockerfile

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,6 @@
11
FROM python:3.12-slim
22

33
WORKDIR /app
4-
RUN apt-get update && \
5-
apt-get install -y git && \
6-
apt-get clean && \
7-
rm -rf /var/lib/apt/lists/*
8-
94
COPY . .
105

116
RUN python3 -m pip install -e .

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
name = "cypher-shell"
33
description = "Cypher Shell -- a shell for querying Neo4j with Cypher"
44
url = "https://github.com/LemurPwned/cypher-shell"
5-
version = "0.3"
5+
version = "0.4"
66
authors = [
77
{ name = "LemurPwned", email = "[email protected]" }
88
]

0 commit comments

Comments
 (0)