refactor: fix window_types problem without reset callback

scymtym · scymtym · commit 89efdbde684f · 2025-03-25T17:26:31.000+01:00
And add test.
diff --git a/execution_engine/task/process/rectangle.py b/execution_engine/task/process/rectangle.py
@@ -517,7 +517,7 @@ def create_time_intervals(
     end_time: datetime.time,
     interval_type: IntervalType,
     timezone: pytz.tzinfo.DstTzInfo | str,
-) -> tuple[Interval, ...]:
+) -> list[Interval]:
     """
     Constructs a list of time intervals within a specified date range, each defined by daily start and end times.
 
@@ -654,8 +654,7 @@ def add_interval(
         # Move to the next day
         current_date += datetime.timedelta(days=1)
 
-    # use a tuple for windows to make sure it is immutable (and can be shared by all persons)
-    return tuple(intervals)
+    return intervals
 
 
 def find_overlapping_personal_windows(
@@ -697,7 +696,6 @@ def find_rectangles(
     data: list[PersonIntervals],
     interval_constructor: Callable,
     is_same_result: Callable | None = None,
-    reset: Callable | None = None,
 ) -> PersonIntervals:
     """
     Iterates over intervals for each person across all items in `data` and constructs new intervals
@@ -717,21 +715,11 @@ def find_rectangles(
         keys: Set[int] = set()
         for track in data:
             keys |= track.keys()
-        result = {}
-
-        for key in keys:
-
-            if reset:
-                reset()
-
-            intervals_for_person: list[list[Interval]] = [
-                intervals.get(key, []) for intervals in data
-            ]
-            intervals = _impl.find_rectangles(
-                intervals_for_person,
+        return {
+            key: _impl.find_rectangles(
+                [intervals.get(key, []) for intervals in data],
                 interval_constructor,
                 is_same_result=is_same_result,
             )
-            result[key] = intervals
-
-        return result
+            for key in keys
+        }
diff --git a/execution_engine/task/task.py b/execution_engine/task/task.py
@@ -1,4 +1,5 @@
 import base64
+import copy
 import datetime
 import json
 import logging
@@ -655,19 +656,13 @@ def get_start_end_from_interval_type(
                 )
 
             # Incrementally compute the interval type for each window
-            # interval.
-            window_types: dict[AnyInterval, IntervalType] = (
-                dict()
-            )  # window interval -> interval type
-
-            # todo: @moringenj - is this additional function really a good solution?
-            def reset_window_types() -> None:
-                window_types.clear()
+            # interval. Maps id of window interval -> interval type
+            window_types: dict[int, IntervalType] = dict()
 
             def update_window_type(
                 window_interval: AnyInterval, data_interval: AnyInterval
             ) -> IntervalType:
-                window_type = window_types.get(window_interval.lower, None)
+                window_type = window_types.get(id(window_interval), None)
 
                 if data_interval is None or data_interval.type is IntervalType.NEGATIVE:
                     if window_type is not IntervalType.POSITIVE:
@@ -681,7 +676,7 @@ def update_window_type(
                     assert data_interval.type is IntervalType.NO_DATA
                     if window_type is None:
                         window_type = IntervalType.NO_DATA
-                window_types[window_interval.lower] = window_type
+                window_types[id(window_interval)] = window_type
 
                 return window_type
 
@@ -690,7 +685,7 @@ def update_window_type(
             # result interval window types based on the data
             # intervals.
             def is_same_interval(
-                left_intervals: tuple[AnyInterval], right_intervals: tuple[AnyInterval]
+                left_intervals: List[AnyInterval], right_intervals: List[AnyInterval]
             ) -> bool:
                 left_window_interval, left_data_interval = left_intervals
                 right_window_interval, right_data_interval = right_intervals
@@ -723,17 +718,22 @@ def result_interval(
                 ):
                     return Interval(start, end, IntervalType.NOT_APPLICABLE)
                 else:
-                    window_type = window_types.get(window_interval.lower, None)
+                    window_type = window_types.get(id(window_interval), None)
                     if window_type is None:
                         window_type = update_window_type(window_interval, data_interval)
                     return Interval(start, end, window_type)
 
-            person_indicator_windows = {key: indicator_windows for key in data_p.keys()}
+            # Make separate copies of the intervals for each person so
+            # that the object identity of each interval is unique and
+            # can be used as a dictionary key.
+            person_indicator_windows = {
+                key: [ copy.copy(window) for window in indicator_windows ]
+                for key in data_p.keys()
+            }
             result = process.find_rectangles(
                 [person_indicator_windows, data_p],
                 result_interval,
                 is_same_result=is_same_interval,
-                reset=reset_window_types,
             )
 
         return result
diff --git a/tests/execution_engine/omop/criterion/combination/test_temporal_combination.py b/tests/execution_engine/omop/criterion/combination/test_temporal_combination.py
@@ -2038,3 +2038,146 @@ def test_interval_ratio_on_database(
                 result_tuples, expected[person.person_id]
             ):
                 assert result_tuple == expected_tuple
+
+class TestIndicatorWindowsMulitplePatients(TestCriterionCombinationDatabase):
+    """
+    This test ensures that the data TemporalCount operator works
+    independently between persons within a PersonIntervals data set.
+
+    This is mostly a regression test since at one point the exact
+    problem of cross-talk between the data structures for different
+    persons caused the operator to return incorrect results.
+    """
+
+    @pytest.fixture
+    def observation_window(self) -> TimeRange:
+        return TimeRange(
+            name="observation", start="2025-02-18 14:55:00+01:00", end="2025-02-22 12:00:00+01:00"
+        )
+
+    def patient_events(self, db_session, visit_occurrence):
+        person_id = visit_occurrence.person_id
+        events = []
+        c1 = create_condition(
+            vo=visit_occurrence,
+            condition_concept_id=concept_covid19.concept_id,
+            condition_start_datetime=pendulum.parse("2025-02-19 08:00:00+01:00"),
+            condition_end_datetime=pendulum.parse("2025-02-21 02:00:00+01:00"),
+        )
+        events.append(c1)
+        if person_id == 1:
+            e1 = create_procedure(
+                vo=visit_occurrence,
+                procedure_concept_id=concept_delir_screening.concept_id,
+                start_datetime=pendulum.parse("2025-02-19 18:00:00+01:00"),
+                end_datetime=pendulum.parse("2025-02-19 18:01:00+01:00"),
+            )
+            events.append(e1)
+        db_session.add_all(events)
+        db_session.commit()
+
+    @pytest.mark.parametrize(
+        "population,intervention,expected",
+        [
+            (
+                logic.And(c2),  # population
+                temporal_logic_util.Day(criterion=delir_screening),
+                {
+                    1: [
+                        (
+                            IntervalType.NOT_APPLICABLE,
+                            pendulum.parse("2025-02-18 17:55:00+01:00"),
+                            pendulum.parse("2025-02-19 07:59:59+01:00"),
+                        ),
+                        (
+                            IntervalType.POSITIVE,
+                            pendulum.parse("2025-02-19 08:00:00+01:00"),
+                            pendulum.parse("2025-02-19 23:59:59+01:00"),
+                        ),
+                        (
+                            IntervalType.NEGATIVE,
+                            pendulum.parse("2025-02-20 00:00:00+01:00"),
+                            pendulum.parse("2025-02-21 02:00:00+01:00"),
+                        ),
+                        (
+                            IntervalType.NOT_APPLICABLE,
+                            pendulum.parse("2025-02-21 02:00:01+01:00"),
+                            pendulum.parse("2025-02-22 05:30:00+01:00"),
+                        ),
+                    ],
+                    2: [
+                        (
+                            IntervalType.NOT_APPLICABLE,
+                            pendulum.parse("2025-02-18 17:55:00+01:00"),
+                            pendulum.parse("2025-02-19 07:59:59+01:00"),
+                        ),
+                        # If cross-talk between the data structures
+                        # for different persons occurs, parts of the
+                        # following interval may turn positive because
+                        # of the results for the first person.
+                        (
+                            IntervalType.NEGATIVE,
+                            pendulum.parse("2025-02-19 08:00:00+01:00"),
+                            pendulum.parse("2025-02-21 02:00:00+01:00"),
+                        ),
+                        (
+                            IntervalType.NOT_APPLICABLE,
+                            pendulum.parse("2025-02-21 02:00:01+01:00"),
+                            pendulum.parse("2025-02-22 05:30:00+01:00"),
+                        ),
+                    ],
+                },
+            ),
+        ],
+    )
+    def test_multiple_patients_on_database(
+        self,
+        person,
+        db_session,
+        population,
+        intervention,
+        base_criterion,
+        expected,
+        observation_window,
+        criteria,
+    ):
+        persons = person[:2]
+        vos = []
+        for person in persons:
+            visit = create_visit(
+                person_id=person.person_id,
+                visit_start_datetime=observation_window.start
+                + datetime.timedelta(hours=3),
+                visit_end_datetime=observation_window.end
+                - datetime.timedelta(hours=6.5),
+                visit_concept_id=concepts.INTENSIVE_CARE,
+            )
+            vos.append(visit)
+            self.patient_events(db_session, visit)
+
+        db_session.add_all(vos)
+        db_session.commit()
+
+        self.insert_expression(
+            db_session, population, intervention, base_criterion, observation_window
+        )
+
+        df = self.fetch_interval_result(
+            db_session,
+            pi_pair_id=self.pi_pair_id,
+            criterion_id=None,
+            category=CohortCategory.POPULATION_INTERVENTION,
+        )
+
+        for person in persons:
+            result = df.query(f"person_id=={person.person_id}")
+            result_tuples = list(
+                result[ [ "interval_type", "interval_start", "interval_end" ] ]
+                .fillna("nan")
+                .itertuples(index=False, name=None)
+            )
+
+            for result_tuple, expected_tuple in zip(
+                result_tuples, expected[person.person_id]
+            ):
+                assert result_tuple == expected_tuple