From c9c4b9f21b3bec28d4b76cf7e31dd2dffd7f9b43 Mon Sep 17 00:00:00 2001 From: Adam Schill Collberg Date: Fri, 23 May 2025 09:45:36 +0200 Subject: [PATCH 1/3] Allow `Driver` as argument to `from_neo4j` --- changelog.md | 2 ++ docs/source/integration.rst | 9 +++-- python-wrapper/src/neo4j_viz/neo4j.py | 31 +++++++++++------ python-wrapper/tests/test_neo4j.py | 50 ++++++++++++++++++++++++++- 4 files changed, 78 insertions(+), 14 deletions(-) diff --git a/changelog.md b/changelog.md index 9940053..54e3a0b 100644 --- a/changelog.md +++ b/changelog.md @@ -8,6 +8,8 @@ ## New features +* Allow passing a `neo4j.Driver` instance as input to `from_neo4j`, in which case the driver will be used internally to fetch the graph data using a simple query + ## Bug fixes diff --git a/docs/source/integration.rst b/docs/source/integration.rst index d99091d..c4ee07b 100644 --- a/docs/source/integration.rst +++ b/docs/source/integration.rst @@ -184,13 +184,16 @@ Once you have installed the additional dependency, you can use the :doc:`from_ne to import query results from Neo4j. The ``from_neo4j`` method takes one mandatory positional parameter: - -* A ``result`` representing the query result either in form of `neo4j.graph.Graph` or `neo4j.Result`. +A ``data`` argument representing either a query result in the shape of a ``neo4j.graph.Graph`` or ``neo4j.Result``, or a +``neo4j.Driver`` in which case a simple default query will be executed internally to retrieve the graph data. We can also provide an optional ``size_property`` parameter, which should refer to a node property, and will be used to determine the sizes of the nodes in the visualization. -The ``node_caption`` and ``relationship_caption`` parameters are also optional, and indicate the node and relationship properties to use for the captions of each element in the visualization. +The ``node_caption`` and ``relationship_caption`` parameters are also optional, and indicate the node and relationship +properties to use for the captions of each element in the visualization. +By default, the captions will be set to the node labels relationship types, but you can specify any property that +exists on these entities. The last optional property, ``node_radius_min_max``, can be used (and is used by default) to scale the node sizes for the visualization. diff --git a/python-wrapper/src/neo4j_viz/neo4j.py b/python-wrapper/src/neo4j_viz/neo4j.py index d5eeb40..4473d33 100644 --- a/python-wrapper/src/neo4j_viz/neo4j.py +++ b/python-wrapper/src/neo4j_viz/neo4j.py @@ -3,7 +3,7 @@ from typing import Optional, Union import neo4j.graph -from neo4j import Result +from neo4j import Driver, Result, RoutingControl from pydantic import BaseModel, ValidationError from neo4j_viz.node import Node @@ -20,14 +20,15 @@ def _parse_validation_error(e: ValidationError, entity_type: type[BaseModel]) -> def from_neo4j( - result: Union[neo4j.graph.Graph, Result], + data: Union[neo4j.graph.Graph, Result, Driver], size_property: Optional[str] = None, node_caption: Optional[str] = "labels", relationship_caption: Optional[str] = "type", node_radius_min_max: Optional[tuple[float, float]] = (3, 60), + row_limit: int = 10_000, ) -> VisualizationGraph: """ - Create a VisualizationGraph from a Neo4j Graph or Neo4j Result object. + Create a VisualizationGraph from a Neo4j `Graph`, Neo4j `Result` or Neo4j `Driver`. All node and relationship properties will be included in the visualization graph. If the properties are named as the fields of the `Node` or `Relationship` classes, they will be included as @@ -36,8 +37,9 @@ def from_neo4j( Parameters ---------- - result : Union[neo4j.graph.Graph, Result] - Query result either in shape of a Graph or result. + data : Union[neo4j.graph.Graph, neo4j.Result, neo4j.Driver] + Either a query result in the shape of a `neo4j.graph.Graph` or `neo4j.Result`, or a `neo4j.Driver` in + which case a simple default query will be executed internally to retrieve the graph data. size_property : str, optional Property to use for node size, by default None. node_caption : str, optional @@ -47,14 +49,23 @@ def from_neo4j( node_radius_min_max : tuple[float, float], optional Minimum and maximum node radius, by default (3, 60). To avoid tiny or huge nodes in the visualization, the node sizes are scaled to fit in the given range. + row_limit : int, optional + Maximum number of rows to return from the query, by default 10_000. + This is only used if a `neo4j.Driver` is passed as `result` argument, otherwise the limit is ignored. """ - if isinstance(result, Result): - graph = result.graph() - elif isinstance(result, neo4j.graph.Graph): - graph = result + if isinstance(data, Result): + graph = data.graph() + elif isinstance(data, neo4j.graph.Graph): + graph = data + elif isinstance(data, Driver): + graph = data.execute_query( + f"MATCH (n)-[r]->(m) RETURN n,r,m LIMIT {row_limit}", + routing_=RoutingControl.READ, + result_transformer_=Result.graph, + ) else: - raise ValueError(f"Invalid input type `{type(result)}`. Expected `neo4j.Graph` or `neo4j.Result`") + raise ValueError(f"Invalid input type `{type(data)}`. Expected `neo4j.Graph`, `neo4j.Result` or `neo4j.Driver`") all_node_field_aliases = Node.all_validation_aliases() all_rel_field_aliases = Relationship.all_validation_aliases() diff --git a/python-wrapper/tests/test_neo4j.py b/python-wrapper/tests/test_neo4j.py index 779e36c..3b38fb1 100644 --- a/python-wrapper/tests/test_neo4j.py +++ b/python-wrapper/tests/test_neo4j.py @@ -2,7 +2,7 @@ import neo4j import pytest -from neo4j import Session +from neo4j import Driver, Session from neo4j_viz.neo4j import from_neo4j from neo4j_viz.node import Node @@ -201,3 +201,51 @@ def test_from_neo4j_rel_error(neo4j_session: Session) -> None: match="Error for relationship property 'caption_align' with provided input 'banana'. Reason: Input should be 'top', 'center' or 'bottom'", ): from_neo4j(graph) + + +@pytest.mark.requires_neo4j_and_gds +def test_from_neo4j_graph_driver(neo4j_session: Session, neo4j_driver: Driver) -> None: + graph = neo4j_session.run("MATCH (a:_CI_A|_CI_B)-[r]->(b) RETURN a, b, r ORDER BY a").graph() + + # Note that this tests requires an empty Neo4j database, as it just fetches everything + VG = from_neo4j(neo4j_driver) + + sorted_nodes: list[neo4j.graph.Node] = sorted(graph.nodes, key=lambda x: dict(x.items())["name"]) + node_ids: list[str] = [node.element_id for node in sorted_nodes] + + expected_nodes = [ + Node( + id=node_ids[0], + caption="_CI_A", + properties=dict( + labels=["_CI_A"], + name="Alice", + height=20, + id=42, + _id=1337, + caption="hello", + ), + ), + Node( + id=node_ids[1], + caption="_CI_A:_CI_B", + size=11, + properties=dict( + labels=["_CI_A", "_CI_B"], + name="Bob", + height=10, + id=84, + __labels=[1, 2], + ), + ), + ] + + assert len(VG.nodes) == 2 + assert sorted(VG.nodes, key=lambda x: x.properties["name"]) == expected_nodes + + assert len(VG.relationships) == 2 + vg_rels = sorted([(e.source, e.target, e.caption) for e in VG.relationships], key=lambda x: x[2] if x[2] else "foo") + assert vg_rels == [ + (node_ids[0], node_ids[1], "KNOWS"), + (node_ids[1], node_ids[0], "RELATED"), + ] From 3507e42a579b364b9d1c38a6c05e44348f7110b2 Mon Sep 17 00:00:00 2001 From: Adam Schill Collberg Date: Mon, 16 Jun 2025 15:17:14 +0200 Subject: [PATCH 2/3] Warn when limiting with `from_neo4j` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Florentin Dörre --- python-wrapper/src/neo4j_viz/neo4j.py | 10 ++++++++++ python-wrapper/tests/test_neo4j.py | 16 ++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/python-wrapper/src/neo4j_viz/neo4j.py b/python-wrapper/src/neo4j_viz/neo4j.py index 4473d33..3d789ad 100644 --- a/python-wrapper/src/neo4j_viz/neo4j.py +++ b/python-wrapper/src/neo4j_viz/neo4j.py @@ -1,5 +1,6 @@ from __future__ import annotations +import warnings from typing import Optional, Union import neo4j.graph @@ -59,6 +60,15 @@ def from_neo4j( elif isinstance(data, neo4j.graph.Graph): graph = data elif isinstance(data, Driver): + rel_count = data.execute_query( + "MATCH ()-[r]->() RETURN count(r) as count", + routing_=RoutingControl.READ, + result_transformer_=Result.single, + ).get("count") # type: ignore[union-attr] + if rel_count > row_limit: + warnings.warn( + f"Database relationship count ({rel_count}) exceeds `row_limit` ({row_limit}), so limiting will be applied. Increase the `row_limit` if needed" + ) graph = data.execute_query( f"MATCH (n)-[r]->(m) RETURN n,r,m LIMIT {row_limit}", routing_=RoutingControl.READ, diff --git a/python-wrapper/tests/test_neo4j.py b/python-wrapper/tests/test_neo4j.py index 3b38fb1..d46be65 100644 --- a/python-wrapper/tests/test_neo4j.py +++ b/python-wrapper/tests/test_neo4j.py @@ -1,3 +1,4 @@ +import re from typing import Generator import neo4j @@ -249,3 +250,18 @@ def test_from_neo4j_graph_driver(neo4j_session: Session, neo4j_driver: Driver) - (node_ids[0], node_ids[1], "KNOWS"), (node_ids[1], node_ids[0], "RELATED"), ] + + +@pytest.mark.requires_neo4j_and_gds +def test_from_neo4j_graph_row_limit_warning(neo4j_session: Session, neo4j_driver: Driver) -> None: + neo4j_session.run("MATCH (a:_CI_A|_CI_B)-[r]->(b) RETURN a, b, r ORDER BY a").graph() + + with pytest.warns( + UserWarning, + match=re.escape( + "Database relationship count (2) exceeds `row_limit` (1), so limiting will be applied. Increase the `row_limit` if needed" + ), + ): + VG = from_neo4j(neo4j_driver, row_limit=1) + + assert len(VG.relationships) == 1 From a7c6cbf1c1e820400cbec1c80ddb60b79bcf0979 Mon Sep 17 00:00:00 2001 From: Adam Schill Collberg Date: Mon, 16 Jun 2025 15:33:03 +0200 Subject: [PATCH 3/3] Warn when doing sampling in `from_gds` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Florentin Dörre --- python-wrapper/src/neo4j_viz/gds.py | 4 ++++ python-wrapper/tests/test_gds.py | 9 ++++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/python-wrapper/src/neo4j_viz/gds.py b/python-wrapper/src/neo4j_viz/gds.py index 3d70343..338de75 100644 --- a/python-wrapper/src/neo4j_viz/gds.py +++ b/python-wrapper/src/neo4j_viz/gds.py @@ -1,5 +1,6 @@ from __future__ import annotations +import warnings from itertools import chain from typing import Optional from uuid import uuid4 @@ -99,6 +100,9 @@ def from_gds( node_count = G.node_count() if node_count > max_node_count: + warnings.warn( + f"The '{G.name()}' projection's node count ({G.node_count()}) exceeds `max_node_count` ({max_node_count}), so subsampling will be applied. Increase `max_node_count` if needed" + ) sampling_ratio = float(max_node_count) / node_count sample_name = f"neo4j-viz_sample_{uuid4()}" G_fetched, _ = gds.graph.sample.rwr(sample_name, G, samplingRatio=sampling_ratio, nodeLabelStratification=True) diff --git a/python-wrapper/tests/test_gds.py b/python-wrapper/tests/test_gds.py index ac9b015..fda1caf 100644 --- a/python-wrapper/tests/test_gds.py +++ b/python-wrapper/tests/test_gds.py @@ -1,3 +1,4 @@ +import re from typing import Any import pandas as pd @@ -267,7 +268,13 @@ def test_from_gds_sample(gds: Any) -> None: from neo4j_viz.gds import from_gds with gds.graph.generate("hello", node_count=11_000, average_degree=1) as G: - VG = from_gds(gds, G) + with pytest.warns( + UserWarning, + match=re.escape( + "The 'hello' projection's node count (11000) exceeds `max_node_count` (10000), so subsampling will be applied. Increase `max_node_count` if needed" + ), + ): + VG = from_gds(gds, G) assert len(VG.nodes) >= 9_500 assert len(VG.nodes) <= 10_500