Merge pull request #1560 from mvdbeek/fix_subworkflow_invocation_tracking

mvdbeek · web-flow · commit f949ece2d014 · 2025-08-18T10:02:10.000+02:00
Fix subworkflow invocation tracking
diff --git a/planemo/galaxy/activity.py b/planemo/galaxy/activity.py
@@ -45,6 +45,7 @@
 from planemo.galaxy.invocations.api import (
     BioblendInvocationApi,
     JOB_ERROR_STATES,
+    NON_TERMINAL_JOB_STATES,
 )
 from planemo.galaxy.invocations.polling import PollingTrackerImpl
 from planemo.galaxy.invocations.polling import wait_for_invocation_and_jobs as polling_wait_for_invocation_and_jobs
@@ -866,9 +867,8 @@ def get_state():
         if not response:
             # invocation may not have any attached jobs, that's fine
             return "ok"
-        non_terminal_states = {"running", "queued", "new", "ready", "resubmitted", "upload", "waiting"}
         current_states = set(item["state"] for item in response)
-        current_non_terminal_states = non_terminal_states.intersection(current_states)
+        current_non_terminal_states = NON_TERMINAL_JOB_STATES.intersection(current_states)
         # Mix of "error"-ish terminal job, dataset, invocation terminal states, so we can use this for whatever we throw at it
         hierarchical_fail_states = [
             "error",
diff --git a/planemo/galaxy/invocations/api.py b/planemo/galaxy/invocations/api.py
@@ -77,3 +77,4 @@ def invocation_state_terminal(state: str):
 
 
 JOB_ERROR_STATES = ["error", "deleted", "failed", "stopped", "stop", "deleting"]
+NON_TERMINAL_JOB_STATES = {"running", "queued", "new", "ready", "resubmitted", "upload", "waiting"}
diff --git a/planemo/galaxy/invocations/polling.py b/planemo/galaxy/invocations/polling.py
@@ -10,6 +10,7 @@
     InvocationApi,
     InvocationJobsSummary,
     JOB_ERROR_STATES,
+    NON_TERMINAL_JOB_STATES,
 )
 from .progress import WorkflowProgressDisplay
 
@@ -101,7 +102,7 @@ def _check_for_errors(
     )
     if error_message:
         final_state = "new" if not invocation else invocation["state"]
-        job_state = summary_job_state(invocation_jobs)
+        job_state = summary_job_state(invocation_jobs, fail_fast)
         return final_state, job_state, error_message
     return None
 
@@ -154,24 +155,25 @@ def wait_for_invocation_and_jobs(
             ctx, invocation_id, invocation_api, workflow_progress_display, fail_fast
         )
 
-        error_result = _check_for_errors(
-            ctx,
-            invocation_id,
-            sub_exception,
-            sub_invocation,
-            sub_jobs,
-            invocation_api,
-            workflow_progress_display,
-            fail_fast,
-        )
-        if error_result:
-            return error_result
+        if sub_invocation:
+            error_result = _check_for_errors(
+                ctx,
+                sub_invocation["id"] if sub_invocation else invocation_id,
+                sub_exception,
+                sub_invocation,
+                sub_jobs,
+                invocation_api,
+                workflow_progress_display,
+                fail_fast,
+            )
+            if error_result:
+                return error_result
 
         if not _is_polling_complete(workflow_progress_display):
             polling_tracker.sleep()
 
     ctx.vlog(f"The final state of all jobs and subworkflow invocations for invocation [{invocation_id}] is 'ok'")
-    job_state = summary_job_state(last_invocation_jobs)
+    job_state = summary_job_state(last_invocation_jobs, fail_fast)
     assert last_invocation
 
     # Final check for job errors when fail_fast is enabled
@@ -203,7 +205,7 @@ def workflow_in_error_message(
     """Return an error message if workflow is in an error state."""
 
     invocation_state = "new" if not last_invocation else last_invocation["state"]
-    job_state = summary_job_state(last_invocation_jobs)
+    job_state = summary_job_state(last_invocation_jobs, fail_fast)
 
     error_message = None
     if last_exception:
@@ -233,14 +235,19 @@ def workflow_in_error_message(
     return error_message
 
 
-# we're still mocking out the old history state by just picking out a random
-# job state of interest. Seems like we should drop this.
-def summary_job_state(job_states_summary: Optional[InvocationJobsSummary]):
-    states = (job_states_summary or {"states": {}}).get("states", {}).copy()
-    states.pop("ok", None)
-    states.pop("skipped", None)
+def summary_job_state(job_states_summary: Optional[InvocationJobsSummary], fail_fast: bool = False):
+    states = {state for state in (job_states_summary or {"states": {}})["states"]}
+    if not fail_fast:
+        current_non_terminal_states = NON_TERMINAL_JOB_STATES.intersection(states)
+        if current_non_terminal_states:
+            # ensure all non-terminal states advance, then return the first failing state, if any.
+            return next(iter(current_non_terminal_states))
     if states:
-        return next(iter(states.keys()))
+        # We have ensured that that all jobs are terminal, we want to return failed jobs in the summary if there are any.
+        for error_state in JOB_ERROR_STATES:
+            if error_state in states:
+                return error_state
+        return next(iter(states))
     else:
         return "ok"
 
diff --git a/planemo/galaxy/invocations/progress.py b/planemo/galaxy/invocations/progress.py
@@ -29,6 +29,7 @@
 
 # Types for various invocation responses
 class InvocationStep(TypedDict, total=False):
+    id: str
     state: Optional[str]
     subworkflow_invocation_id: Optional[str]
 
@@ -347,6 +348,7 @@ def __init__(
         self.subworkflow_invocation_ids_seen: Set[str] = set()
         self.subworkflow_invocation_ids_completed: Set[str] = set()
         self.subworkflow_invocation_id: Optional[str] = None
+        self.new_steps: List[str] = []
         self.invocation_id = invocation_id
         display = display_configuration or DisplayConfiguration()
         self.galaxy_url = galaxy_url
@@ -358,10 +360,14 @@ def __init__(
     def _register_subworkflow_invocation_ids_from(self, invocation: Invocation):
         subworkflow_invocation_ids: List[str] = []
         steps = invocation.get("steps") or []
+        new_steps: List[str] = []
         for step in steps:
+            if step["state"] == "new":
+                new_steps.append(step["id"])
             subworkflow_invocation_id = step.get("subworkflow_invocation_id")
             if subworkflow_invocation_id:
                 subworkflow_invocation_ids.append(subworkflow_invocation_id)
+        self.new_steps = new_steps
         self._register_subworkflow_invocation_ids(subworkflow_invocation_ids)
 
     def _register_subworkflow_invocation_ids(self, ids: List[str]):
@@ -375,6 +381,9 @@ def an_incomplete_subworkflow_id(self):
         return random.choice(tuple(self.subworkflow_invocation_ids_seen - self.subworkflow_invocation_ids_completed))
 
     def all_subworkflows_complete(self):
+        if self.new_steps:
+            # These don't have subworkflow invocation ids yet, we can't know if they're all complete
+            return False
         return len(self.subworkflow_invocation_ids_seen) == len(self.subworkflow_invocation_ids_completed)
 
     def get_invocation_ui_link(self):
diff --git a/tests/test_invocation_polling.py b/tests/test_invocation_polling.py
@@ -25,6 +25,7 @@
     SCENARIO_1,
     SCENARIO_MULTIPLE_OK_SUBWORKFLOWS,
     SCENARIO_NESTED_SUBWORKFLOWS,
+    SCENARIO_SUBWORKFLOW_WITH_FAILED_JOBS,
 )
 
 SLEEP = 0
@@ -42,10 +43,10 @@ def sleep(self) -> None:
 
 def test_polling_scenario_1():
     final_invocation_state, job_state, error_message = run_workflow_simulation(SCENARIO_1, fail_fast=True)
-    assert final_invocation_state == "scheduled"
-    assert job_state == "failed"
+    assert final_invocation_state == "ready"  # early job error and fail fast, invocation doesn't advance to scheduled
+    assert job_state == "error"
     assert error_message
-    assert "failed" in error_message
+    assert "error" in error_message
 
 
 def test_polling_scenario_three_ok_subworkflows():
@@ -80,10 +81,10 @@ def test_polling_without_display():
         display,
         fail_fast=True,
     )
-    assert final_invocation_state == "scheduled"
-    assert job_state == "failed"
+    assert final_invocation_state == "ready"
+    assert job_state == "error"
     assert error_message
-    assert "failed" in error_message
+    assert "error" in error_message
 
 
 def test_polling_with_compact_display():
@@ -117,19 +118,19 @@ def test_fail_fast_enabled_with_job_failure():
     """Test that fail_fast=True returns error when a job fails."""
     final_invocation_state, job_state, error_message = run_workflow_simulation(SCENARIO_1, fail_fast=True)
     # Invocation should still be scheduled (workflow scheduling succeeded)
-    assert final_invocation_state == "scheduled"
-    assert job_state == "failed"
+    assert final_invocation_state == "ready"
+    assert job_state == "error"
     # fail_fast should detect the failed job and return error message
     assert error_message
-    assert "Failed to run workflow, at least one job is in [failed] state." in error_message
+    assert "Failed to run workflow, at least one job is in [error] state." in error_message
 
 
 def test_fail_fast_disabled_with_job_failure():
     """Test that fail_fast=False does not report job failures as errors."""
     final_invocation_state, job_state, error_message = run_workflow_simulation(SCENARIO_1, fail_fast=False)
     # Invocation should be scheduled (workflow scheduling succeeded)
     assert final_invocation_state == "scheduled"
-    assert job_state == "failed"
+    assert job_state == "error"
     # Without fail_fast, job failures shouldn't cause error messages
     # (unless invocation itself fails, which it doesn't in this case)
     assert error_message is None
@@ -145,6 +146,19 @@ def test_fail_fast_enabled_with_successful_workflow():
     assert not error_message
 
 
+def test_fail_fast_enabled_with_subworkflow_job_failure():
+    """Test that fail_fast=True terminates when encountering jobs that are errored inside a subworkflow invocation."""
+    final_invocation_state, job_state, error_message = run_workflow_simulation(
+        SCENARIO_SUBWORKFLOW_WITH_FAILED_JOBS, fail_fast=True
+    )
+    # Invocation is ready to schedule more steps, yet the polling should terminate
+    assert final_invocation_state == "ready"
+    assert job_state == "error"
+    # fail_fast should detect the failed job in the subworkflow and return error message
+    assert error_message
+    assert "Failed to run workflow, at least one job is in [error] state." in error_message
+
+
 def run_workflow_simulation(
     yaml_str: str, display_configuration: Optional[DisplayConfiguration] = None, fail_fast: bool = False
 ):
@@ -179,7 +193,7 @@ def show_job(self, job_id, full_details=False):
         """Return mock job details with exit code and stderr."""
         return {
             "id": job_id,
-            "state": "failed",
+            "state": "error",
             "exit_code": 1,
             "stderr": f"Error: Mock job {job_id} failed with exit code 1\nAdditional error details here",
             "stdout": f"Mock job {job_id} output",
@@ -204,7 +218,7 @@ class MockInvocationsApi:
 
     def show_invocation_step(self, invocation_id, step_id):
         """Return mock invocation step details."""
-        return {"id": step_id, "jobs": [{"id": f"job_{step_id}", "state": "failed"}]}
+        return {"id": step_id, "jobs": [{"id": f"job_{step_id}", "state": "error"}]}
 
 
 class SimulatedApi(InvocationApi):
@@ -239,7 +253,7 @@ def get_job(self, job_id: str, full_details: bool = False) -> Job:
         """Return mock job details."""
         return {
             "id": job_id,
-            "state": "failed",
+            "state": "error",
             "exit_code": 1,
             "stderr": f"Error: Mock job {job_id} failed with exit code 1\nAdditional error details here",
             "stdout": f"Mock job {job_id} output",
diff --git a/tests/test_workflow_progress.py b/tests/test_workflow_progress.py
@@ -6,8 +6,8 @@
 )
 from planemo.galaxy.invocations.progress_display import DisplayConfiguration
 
-STEP_NEW = {"state": "new"}
-STEP_SCHEDULED = {"state": "scheduled"}
+STEP_NEW = {"state": "new", "id": "1"}
+STEP_SCHEDULED = {"state": "scheduled", "id": "1"}
 SLEEP = 0.8
 
 
diff --git a/tests/test_workflow_simulation.py b/tests/test_workflow_simulation.py
@@ -1,5 +1,23 @@
 from planemo.galaxy.invocations.simulations import parse_workflow_simulation_from_string
 
+# These scenario simulates a workflow execution timeline with the following structure:
+#
+# Field explanations:
+# - states: Array defining the main workflow invocation's state progression over time
+#   Format: [initial_state, intermediate_state:duration, final_state]
+#   - "new": Workflow just created
+#   - "ready:4": Workflow in ready state for 4 time ticks
+#   - "scheduled": Workflow is running/scheduled
+#
+# - steps: Array of workflow steps that execute in sequence
+#   - Each step has a "state" and may contain "jobs" or nested "invocation" (subworkflow)
+#   - "after: N": This step starts after N time ticks
+#
+# - jobs: Array of jobs within a step
+#   - states: Job state progression over time
+#   - Common states: new -> queued -> running -> ok/failed
+#   - Format with duration: "queued:2" means job stays queued for 2 ticks
+
 SCENARIO_1 = """
 states: [new, ready:4, scheduled]
 steps:
@@ -9,7 +27,7 @@
 - after: 2
   state: scheduled
   jobs:
-  - states: [new, queued, failed]
+  - states: [new, queued, error]
   - states: [new, queued, ok]
 - after: 3
   state: scheduled
@@ -157,6 +175,29 @@
 """
 
 
+SCENARIO_SUBWORKFLOW_WITH_FAILED_JOBS = """
+states: [new, ready:4]
+steps:
+- state: scheduled
+  jobs:
+  - states: [new, queued:2, running:2, ok]
+- after: 2
+  state: scheduled
+  jobs:
+  - states: [new, queued, running:2, ok]
+  - states: [new, queued, running:4, ok]
+- after: 3
+  state: scheduled
+  invocation:
+    states: [new, ready]
+    steps:
+    - state: scheduled
+      jobs:
+        - states: [new, queued, error]
+        - states: [new:2, paused]
+"""
+
+
 def test_parse_scenario_1_invocation_state_evolution():
     invocation = parse_workflow_simulation_from_string(SCENARIO_1)
     invocation_dict = invocation.get_api_invocation()
@@ -227,7 +268,7 @@ def test_parse_scenario_1_invocation_job_states():
     assert len(states) == 3
     assert states["ok"] == 1
     assert states["running"] == 1
-    assert states["failed"] == 1
+    assert states["error"] == 1
 
 
 def test_parse_scenario_1_subworkflow_invocation_state():

Original file line number	Diff line number	Diff line change
`@@ -77,3 +77,4 @@ def invocation_state_terminal(state: str):`
`77`	`77`
`78`	`78`
`79`	`79`	`JOB_ERROR_STATES = ["error", "deleted", "failed", "stopped", "stop", "deleting"]`
	`80`	`+NON_TERMINAL_JOB_STATES = {"running", "queued", "new", "ready", "resubmitted", "upload", "waiting"}`
Original file line number	Diff line number	Diff line change
`@@ -6,8 +6,8 @@`
`6`	`6`	`)`
`7`	`7`	`from planemo.galaxy.invocations.progress_display import DisplayConfiguration`
`8`	`8`
`9`		`-STEP_NEW = {"state": "new"}`
`10`		`-STEP_SCHEDULED = {"state": "scheduled"}`
	`9`	`+STEP_NEW = {"state": "new", "id": "1"}`
	`10`	`+STEP_SCHEDULED = {"state": "scheduled", "id": "1"}`
`11`	`11`	`SLEEP = 0.8`
`12`	`12`
`13`	`13`