Skip to content

Commit 5a396d0

Browse files
authored
Merge pull request #283 from CODEX-CELIDA/interval-ratio-removed-criterion-combination
Interval ratio removed criterion combination
2 parents cbd44c8 + 65e815c commit 5a396d0

32 files changed

+2781
-2801
lines changed

execution_engine/execution_engine.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -434,6 +434,23 @@ def execute_recommendation(
434434
assert start_datetime.tzinfo, "start_datetime must be timezone-aware"
435435
assert end_datetime.tzinfo, "end_datetime must be timezone-aware"
436436

437+
# We cut off microseconds to ensure second-level precision.
438+
# In Python, timestamps are usually floored when cast to int,
439+
# but in PostgreSQL they might be rounded. To avoid subtle bugs
440+
# where a 1-second difference appears due to different rounding
441+
# strategies, we explicitly strip microseconds and assert that
442+
# the timestamp is exactly an integer.
443+
start_datetime = start_datetime.replace(microsecond=0)
444+
end_datetime = end_datetime.replace(microsecond=0)
445+
446+
assert start_datetime.timestamp() == int(
447+
start_datetime.timestamp()
448+
), f"start_datetime still contains sub-second precision: {start_datetime}"
449+
450+
assert end_datetime.timestamp() == int(
451+
end_datetime.timestamp()
452+
), f"end_datetime still contains sub-second precision: {end_datetime}"
453+
437454
date_format = "%Y-%m-%d %H:%M:%S %z"
438455

439456
logging.info(

execution_engine/execution_graph/graph.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,10 @@ def traverse(
104104

105105
traverse(expr, category=category)
106106

107+
# make sure base node has still the correct category - it might have changed duing traversal if the base node
108+
# is used in an interval_criterion of a TemporalCount operator
109+
graph.nodes[base_node].update({"category": CohortCategory.BASE})
110+
107111
if hash(expr) != expr_hash:
108112
raise ValueError("Expression has been modified during traversal")
109113

execution_engine/omop/criterion/abstract.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,8 @@
2727
from execution_engine.util.interval import IntervalType
2828
from execution_engine.util.serializable import SerializableDataClassABC
2929
from execution_engine.util.sql import SelectInto, select_into
30-
from execution_engine.util.types import PersonIntervals, TimeRange
30+
from execution_engine.util.types import PersonIntervals
31+
from execution_engine.util.types.timerange import TimeRange
3132

3233
__all__ = [
3334
"Criterion",

execution_engine/omop/criterion/point_in_time.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@
1010
from execution_engine.omop.criterion.concept import ConceptCriterion
1111
from execution_engine.task.process import get_processing_module
1212
from execution_engine.util.interval import IntervalType
13-
from execution_engine.util.types import PersonIntervals, TimeRange, Timing
13+
from execution_engine.util.types import PersonIntervals, Timing
14+
from execution_engine.util.types.timerange import TimeRange
1415
from execution_engine.util.value import Value
1516

1617
process = get_processing_module()

execution_engine/omop/db/celida/tables.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,8 @@
2727
from execution_engine.util.interval import IntervalType
2828

2929
# Use the "public" schema so that tables in different schemas can
30-
# these enums easily without introducing depedencies between the
31-
# respective schemas. Note that replicate the enum definitions in each
30+
# use these enums easily without introducing dependencies between the
31+
# respective schemas. Note that replicating the enum definitions in each
3232
# schema would not work when data must be exchanged between the
3333
# schemas because enum definitions in separate schemas, even if
3434
# identical in terms of enum values, are considered distinct and
@@ -37,7 +37,6 @@
3737
CohortCategoryEnum = Enum(CohortCategory, name="cohort_category", schema="public")
3838

3939

40-
4140
class Recommendation(Base): # noqa: D101
4241
__tablename__ = "recommendation"
4342
__table_args__ = {"schema": SCHEMA_NAME}
@@ -170,7 +169,9 @@ class ResultInterval(Base): # noqa: D101
170169
interval_start: Mapped[datetime]
171170
interval_end: Mapped[datetime]
172171
interval_type = mapped_column(IntervalTypeEnum)
173-
172+
interval_ratio: Mapped[float] = mapped_column(
173+
nullable=True
174+
)
174175
execution_run: Mapped["ExecutionRun"] = relationship(
175176
primaryjoin="ResultInterval.run_id == ExecutionRun.run_id",
176177
)

execution_engine/omop/db/celida/views.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,7 @@ def interval_result_view() -> Select:
199199
rri.c.interval_type,
200200
rri.c.interval_start,
201201
rri.c.interval_end,
202+
rri.c.interval_ratio,
202203
)
203204
.select_from(rri)
204205
.outerjoin(pip, (rri.c.pi_pair_id == pip.c.pi_pair_id))

execution_engine/omop/sqlclient.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,35 @@ def _enable_database_triggers(
5858
cursor.close()
5959

6060

61+
def datetime_cols_to_epoch(stmt: sqlalchemy.Select) -> sqlalchemy.Select:
62+
"""
63+
Given a SQLAlchemy 2.0 Select that has columns labeled 'interval_start'
64+
or 'interval_end', replace those column expressions with
65+
EXTRACT(EPOCH FROM <expr>)::BIGINT so they become integer timestamps.
66+
67+
Returns a new Select object with the replaced columns.
68+
"""
69+
new_columns = []
70+
71+
for col in stmt.selected_columns:
72+
label = getattr(col, "name")
73+
74+
if label in ("interval_start", "interval_end"):
75+
# We'll wrap col in EXTRACT(EPOCH FROM col)::BIGINT,
76+
new_col = (
77+
sqlalchemy.func.extract("epoch", col)
78+
.cast(sqlalchemy.BigInteger)
79+
.label(label)
80+
)
81+
new_columns.append(new_col)
82+
else:
83+
new_columns.append(col)
84+
85+
new_stmt = stmt.with_only_columns(*new_columns, maintain_column_froms=True)
86+
87+
return new_stmt
88+
89+
6190
class OMOPSQLClient:
6291
"""A client for the OMOP SQL database.
6392

execution_engine/task/process/__init__.py

Lines changed: 36 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,10 @@
33
import sys
44
import types
55
from collections import namedtuple
6+
from typing import TypeVar
7+
8+
from execution_engine.util.interval import IntervalType
9+
from execution_engine.util.types.timerange import TimeRange
610

711

812
def get_processing_module(
@@ -39,6 +43,35 @@ def get_processing_module(
3943

4044
Interval = namedtuple("Interval", ["lower", "upper", "type"])
4145
IntervalWithCount = namedtuple("IntervalWithCount", ["lower", "upper", "type", "count"])
42-
IntervalWithTypeCounts = namedtuple(
43-
"IntervalWithTypeCounts", ["lower", "upper", "counts"]
44-
)
46+
47+
AnyInterval = Interval | IntervalWithCount
48+
GeneralizedInterval = None | AnyInterval
49+
50+
TInterval = TypeVar("TInterval", bound=AnyInterval)
51+
52+
53+
def interval_like(interval: TInterval, start: int, end: int) -> TInterval:
54+
"""
55+
Return a copy of the given interval with its lower and upper bounds replaced.
56+
57+
Args:
58+
interval (I): The interval to copy. Must be one of Interval or IntervalWithCount.
59+
start (datetime): The new lower bound.
60+
end (datetime): The new upper bound.
61+
62+
Returns:
63+
I: A copy of the interval with updated lower and upper bounds.
64+
"""
65+
66+
return interval._replace(lower=start, upper=end) # type: ignore[return-value]
67+
68+
69+
def timerange_to_interval(tr: TimeRange, type_: IntervalType) -> Interval:
70+
"""
71+
Converts a timerange to an interval with the supplied type.
72+
"""
73+
return Interval(
74+
lower=int(tr.start.timestamp()),
75+
upper=int(tr.end.timestamp()),
76+
type=type_,
77+
)

0 commit comments

Comments
 (0)