Skip to content

Commit 2c067b4

Browse files
authored
fix(telemetry): group traces when using agent as tool in an agent, fixed instrumentation bug (#493)
1 parent 9faadbf commit 2c067b4

File tree

3 files changed

+81
-77
lines changed

3 files changed

+81
-77
lines changed

src/strands/agent/agent.py

Lines changed: 19 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
from concurrent.futures import ThreadPoolExecutor
1717
from typing import Any, AsyncGenerator, AsyncIterator, Callable, Mapping, Optional, Type, TypeVar, Union, cast
1818

19-
from opentelemetry import trace
19+
from opentelemetry import trace as trace_api
2020
from pydantic import BaseModel
2121

2222
from ..event_loop.event_loop import event_loop_cycle, run_tool
@@ -298,7 +298,7 @@ def __init__(
298298

299299
# Initialize tracer instance (no-op if not configured)
300300
self.tracer = get_tracer()
301-
self.trace_span: Optional[trace.Span] = None
301+
self.trace_span: Optional[trace_api.Span] = None
302302

303303
# Initialize agent state management
304304
if state is not None:
@@ -501,24 +501,24 @@ async def stream_async(self, prompt: Union[str, list[ContentBlock]], **kwargs: A
501501
content: list[ContentBlock] = [{"text": prompt}] if isinstance(prompt, str) else prompt
502502
message: Message = {"role": "user", "content": content}
503503

504-
self._start_agent_trace_span(message)
504+
self.trace_span = self._start_agent_trace_span(message)
505+
with trace_api.use_span(self.trace_span):
506+
try:
507+
events = self._run_loop(message, invocation_state=kwargs)
508+
async for event in events:
509+
if "callback" in event:
510+
callback_handler(**event["callback"])
511+
yield event["callback"]
505512

506-
try:
507-
events = self._run_loop(message, invocation_state=kwargs)
508-
async for event in events:
509-
if "callback" in event:
510-
callback_handler(**event["callback"])
511-
yield event["callback"]
513+
result = AgentResult(*event["stop"])
514+
callback_handler(result=result)
515+
yield {"result": result}
512516

513-
result = AgentResult(*event["stop"])
514-
callback_handler(result=result)
515-
yield {"result": result}
517+
self._end_agent_trace_span(response=result)
516518

517-
self._end_agent_trace_span(response=result)
518-
519-
except Exception as e:
520-
self._end_agent_trace_span(error=e)
521-
raise
519+
except Exception as e:
520+
self._end_agent_trace_span(error=e)
521+
raise
522522

523523
async def _run_loop(
524524
self, message: Message, invocation_state: dict[str, Any]
@@ -650,15 +650,14 @@ def _record_tool_execution(
650650
self._append_message(tool_result_msg)
651651
self._append_message(assistant_msg)
652652

653-
def _start_agent_trace_span(self, message: Message) -> None:
653+
def _start_agent_trace_span(self, message: Message) -> trace_api.Span:
654654
"""Starts a trace span for the agent.
655655
656656
Args:
657657
message: The user message.
658658
"""
659659
model_id = self.model.config.get("model_id") if hasattr(self.model, "config") else None
660-
661-
self.trace_span = self.tracer.start_agent_span(
660+
return self.tracer.start_agent_span(
662661
message=message,
663662
agent_name=self.name,
664663
model_id=model_id,

src/strands/event_loop/event_loop.py

Lines changed: 60 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
import uuid
1414
from typing import TYPE_CHECKING, Any, AsyncGenerator, cast
1515

16+
from opentelemetry import trace as trace_api
17+
1618
from ..experimental.hooks import (
1719
AfterModelInvocationEvent,
1820
AfterToolInvocationEvent,
@@ -114,72 +116,75 @@ async def event_loop_cycle(agent: "Agent", invocation_state: dict[str, Any]) ->
114116
parent_span=cycle_span,
115117
model_id=model_id,
116118
)
117-
118-
tool_specs = agent.tool_registry.get_all_tool_specs()
119-
120-
agent.hooks.invoke_callbacks(
121-
BeforeModelInvocationEvent(
122-
agent=agent,
123-
)
124-
)
125-
126-
try:
127-
# TODO: To maintain backwards compatibility, we need to combine the stream event with invocation_state
128-
# before yielding to the callback handler. This will be revisited when migrating to strongly
129-
# typed events.
130-
async for event in stream_messages(agent.model, agent.system_prompt, agent.messages, tool_specs):
131-
if "callback" in event:
132-
yield {
133-
"callback": {**event["callback"], **(invocation_state if "delta" in event["callback"] else {})}
134-
}
135-
136-
stop_reason, message, usage, metrics = event["stop"]
137-
invocation_state.setdefault("request_state", {})
119+
with trace_api.use_span(model_invoke_span):
120+
tool_specs = agent.tool_registry.get_all_tool_specs()
138121

139122
agent.hooks.invoke_callbacks(
140-
AfterModelInvocationEvent(
123+
BeforeModelInvocationEvent(
141124
agent=agent,
142-
stop_response=AfterModelInvocationEvent.ModelStopResponse(
143-
stop_reason=stop_reason,
144-
message=message,
145-
),
146125
)
147126
)
148127

149-
if model_invoke_span:
150-
tracer.end_model_invoke_span(model_invoke_span, message, usage, stop_reason)
151-
break # Success! Break out of retry loop
152-
153-
except Exception as e:
154-
if model_invoke_span:
155-
tracer.end_span_with_error(model_invoke_span, str(e), e)
156-
157-
agent.hooks.invoke_callbacks(
158-
AfterModelInvocationEvent(
159-
agent=agent,
160-
exception=e,
128+
try:
129+
# TODO: To maintain backwards compatibility, we need to combine the stream event with invocation_state
130+
# before yielding to the callback handler. This will be revisited when migrating to strongly
131+
# typed events.
132+
async for event in stream_messages(agent.model, agent.system_prompt, agent.messages, tool_specs):
133+
if "callback" in event:
134+
yield {
135+
"callback": {
136+
**event["callback"],
137+
**(invocation_state if "delta" in event["callback"] else {}),
138+
}
139+
}
140+
141+
stop_reason, message, usage, metrics = event["stop"]
142+
invocation_state.setdefault("request_state", {})
143+
144+
agent.hooks.invoke_callbacks(
145+
AfterModelInvocationEvent(
146+
agent=agent,
147+
stop_response=AfterModelInvocationEvent.ModelStopResponse(
148+
stop_reason=stop_reason,
149+
message=message,
150+
),
151+
)
161152
)
162-
)
163153

164-
if isinstance(e, ModelThrottledException):
165-
if attempt + 1 == MAX_ATTEMPTS:
166-
yield {"callback": {"force_stop": True, "force_stop_reason": str(e)}}
167-
raise e
154+
if model_invoke_span:
155+
tracer.end_model_invoke_span(model_invoke_span, message, usage, stop_reason)
156+
break # Success! Break out of retry loop
168157

169-
logger.debug(
170-
"retry_delay_seconds=<%s>, max_attempts=<%s>, current_attempt=<%s> "
171-
"| throttling exception encountered "
172-
"| delaying before next retry",
173-
current_delay,
174-
MAX_ATTEMPTS,
175-
attempt + 1,
158+
except Exception as e:
159+
if model_invoke_span:
160+
tracer.end_span_with_error(model_invoke_span, str(e), e)
161+
162+
agent.hooks.invoke_callbacks(
163+
AfterModelInvocationEvent(
164+
agent=agent,
165+
exception=e,
166+
)
176167
)
177-
time.sleep(current_delay)
178-
current_delay = min(current_delay * 2, MAX_DELAY)
179168

180-
yield {"callback": {"event_loop_throttled_delay": current_delay, **invocation_state}}
181-
else:
182-
raise e
169+
if isinstance(e, ModelThrottledException):
170+
if attempt + 1 == MAX_ATTEMPTS:
171+
yield {"callback": {"force_stop": True, "force_stop_reason": str(e)}}
172+
raise e
173+
174+
logger.debug(
175+
"retry_delay_seconds=<%s>, max_attempts=<%s>, current_attempt=<%s> "
176+
"| throttling exception encountered "
177+
"| delaying before next retry",
178+
current_delay,
179+
MAX_ATTEMPTS,
180+
attempt + 1,
181+
)
182+
time.sleep(current_delay)
183+
current_delay = min(current_delay * 2, MAX_DELAY)
184+
185+
yield {"callback": {"event_loop_throttled_delay": current_delay, **invocation_state}}
186+
else:
187+
raise e
183188

184189
try:
185190
# Add message in trace and mark the end of the stream messages trace

src/strands/telemetry/tracer.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,7 @@ def start_model_invoke_span(
213213
parent_span: Optional[Span] = None,
214214
model_id: Optional[str] = None,
215215
**kwargs: Any,
216-
) -> Optional[Span]:
216+
) -> Span:
217217
"""Start a new span for a model invocation.
218218
219219
Args:
@@ -414,7 +414,7 @@ def start_agent_span(
414414
tools: Optional[list] = None,
415415
custom_trace_attributes: Optional[Mapping[str, AttributeValue]] = None,
416416
**kwargs: Any,
417-
) -> Optional[Span]:
417+
) -> Span:
418418
"""Start a new span for an agent invocation.
419419
420420
Args:

0 commit comments

Comments
 (0)