feat: Sync updates from stainless branch: ehhuang/dev (#149)

ehhuang · Eric Huang (AI Platform) · web-flow · commit 367da690dabe · 2025-02-20T14:09:43.000-08:00
# What does this PR do?

## Test Plan

LLAMA_STACK_CONFIG=fireworks pytest -s -v tests/client-sdk/
--safety-shield meta-llama/Llama-Guard-3-8B

[//]: # (## Documentation)
[//]: # (- [ ] Added a Changelog entry if the change is significant)

Co-authored-by: Eric Huang (AI Platform) &lt;erichuang@fb.com&gt;
diff --git a/src/llama_stack_client/resources/batch_inference.py b/src/llama_stack_client/resources/batch_inference.py
@@ -60,7 +60,7 @@ def chat_completion(
         logprobs: batch_inference_chat_completion_params.Logprobs | NotGiven = NOT_GIVEN,
         response_format: ResponseFormat | NotGiven = NOT_GIVEN,
         sampling_params: SamplingParams | NotGiven = NOT_GIVEN,
-        tool_choice: Literal["auto", "required"] | NotGiven = NOT_GIVEN,
+        tool_choice: Literal["auto", "required", "none"] | NotGiven = NOT_GIVEN,
         tool_prompt_format: Literal["json", "function_tag", "python_list"] | NotGiven = NOT_GIVEN,
         tools: Iterable[batch_inference_chat_completion_params.Tool] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -183,7 +183,7 @@ async def chat_completion(
         logprobs: batch_inference_chat_completion_params.Logprobs | NotGiven = NOT_GIVEN,
         response_format: ResponseFormat | NotGiven = NOT_GIVEN,
         sampling_params: SamplingParams | NotGiven = NOT_GIVEN,
-        tool_choice: Literal["auto", "required"] | NotGiven = NOT_GIVEN,
+        tool_choice: Literal["auto", "required", "none"] | NotGiven = NOT_GIVEN,
         tool_prompt_format: Literal["json", "function_tag", "python_list"] | NotGiven = NOT_GIVEN,
         tools: Iterable[batch_inference_chat_completion_params.Tool] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
diff --git a/src/llama_stack_client/resources/inference.py b/src/llama_stack_client/resources/inference.py
@@ -70,7 +70,7 @@ def chat_completion(
         response_format: ResponseFormat | NotGiven = NOT_GIVEN,
         sampling_params: SamplingParams | NotGiven = NOT_GIVEN,
         stream: Literal[False] | NotGiven = NOT_GIVEN,
-        tool_choice: Literal["auto", "required"] | NotGiven = NOT_GIVEN,
+        tool_choice: Literal["auto", "required", "none"] | NotGiven = NOT_GIVEN,
         tool_config: inference_chat_completion_params.ToolConfig | NotGiven = NOT_GIVEN,
         tool_prompt_format: Literal["json", "function_tag", "python_list"] | NotGiven = NOT_GIVEN,
         tools: Iterable[inference_chat_completion_params.Tool] | NotGiven = NOT_GIVEN,
@@ -138,7 +138,7 @@ def chat_completion(
         logprobs: inference_chat_completion_params.Logprobs | NotGiven = NOT_GIVEN,
         response_format: ResponseFormat | NotGiven = NOT_GIVEN,
         sampling_params: SamplingParams | NotGiven = NOT_GIVEN,
-        tool_choice: Literal["auto", "required"] | NotGiven = NOT_GIVEN,
+        tool_choice: Literal["auto", "required", "none"] | NotGiven = NOT_GIVEN,
         tool_config: inference_chat_completion_params.ToolConfig | NotGiven = NOT_GIVEN,
         tool_prompt_format: Literal["json", "function_tag", "python_list"] | NotGiven = NOT_GIVEN,
         tools: Iterable[inference_chat_completion_params.Tool] | NotGiven = NOT_GIVEN,
@@ -206,7 +206,7 @@ def chat_completion(
         logprobs: inference_chat_completion_params.Logprobs | NotGiven = NOT_GIVEN,
         response_format: ResponseFormat | NotGiven = NOT_GIVEN,
         sampling_params: SamplingParams | NotGiven = NOT_GIVEN,
-        tool_choice: Literal["auto", "required"] | NotGiven = NOT_GIVEN,
+        tool_choice: Literal["auto", "required", "none"] | NotGiven = NOT_GIVEN,
         tool_config: inference_chat_completion_params.ToolConfig | NotGiven = NOT_GIVEN,
         tool_prompt_format: Literal["json", "function_tag", "python_list"] | NotGiven = NOT_GIVEN,
         tools: Iterable[inference_chat_completion_params.Tool] | NotGiven = NOT_GIVEN,
@@ -274,7 +274,7 @@ def chat_completion(
         response_format: ResponseFormat | NotGiven = NOT_GIVEN,
         sampling_params: SamplingParams | NotGiven = NOT_GIVEN,
         stream: Literal[False] | Literal[True] | NotGiven = NOT_GIVEN,
-        tool_choice: Literal["auto", "required"] | NotGiven = NOT_GIVEN,
+        tool_choice: Literal["auto", "required", "none"] | NotGiven = NOT_GIVEN,
         tool_config: inference_chat_completion_params.ToolConfig | NotGiven = NOT_GIVEN,
         tool_prompt_format: Literal["json", "function_tag", "python_list"] | NotGiven = NOT_GIVEN,
         tools: Iterable[inference_chat_completion_params.Tool] | NotGiven = NOT_GIVEN,
@@ -567,7 +567,7 @@ async def chat_completion(
         response_format: ResponseFormat | NotGiven = NOT_GIVEN,
         sampling_params: SamplingParams | NotGiven = NOT_GIVEN,
         stream: Literal[False] | NotGiven = NOT_GIVEN,
-        tool_choice: Literal["auto", "required"] | NotGiven = NOT_GIVEN,
+        tool_choice: Literal["auto", "required", "none"] | NotGiven = NOT_GIVEN,
         tool_config: inference_chat_completion_params.ToolConfig | NotGiven = NOT_GIVEN,
         tool_prompt_format: Literal["json", "function_tag", "python_list"] | NotGiven = NOT_GIVEN,
         tools: Iterable[inference_chat_completion_params.Tool] | NotGiven = NOT_GIVEN,
@@ -635,7 +635,7 @@ async def chat_completion(
         logprobs: inference_chat_completion_params.Logprobs | NotGiven = NOT_GIVEN,
         response_format: ResponseFormat | NotGiven = NOT_GIVEN,
         sampling_params: SamplingParams | NotGiven = NOT_GIVEN,
-        tool_choice: Literal["auto", "required"] | NotGiven = NOT_GIVEN,
+        tool_choice: Literal["auto", "required", "none"] | NotGiven = NOT_GIVEN,
         tool_config: inference_chat_completion_params.ToolConfig | NotGiven = NOT_GIVEN,
         tool_prompt_format: Literal["json", "function_tag", "python_list"] | NotGiven = NOT_GIVEN,
         tools: Iterable[inference_chat_completion_params.Tool] | NotGiven = NOT_GIVEN,
@@ -703,7 +703,7 @@ async def chat_completion(
         logprobs: inference_chat_completion_params.Logprobs | NotGiven = NOT_GIVEN,
         response_format: ResponseFormat | NotGiven = NOT_GIVEN,
         sampling_params: SamplingParams | NotGiven = NOT_GIVEN,
-        tool_choice: Literal["auto", "required"] | NotGiven = NOT_GIVEN,
+        tool_choice: Literal["auto", "required", "none"] | NotGiven = NOT_GIVEN,
         tool_config: inference_chat_completion_params.ToolConfig | NotGiven = NOT_GIVEN,
         tool_prompt_format: Literal["json", "function_tag", "python_list"] | NotGiven = NOT_GIVEN,
         tools: Iterable[inference_chat_completion_params.Tool] | NotGiven = NOT_GIVEN,
@@ -771,7 +771,7 @@ async def chat_completion(
         response_format: ResponseFormat | NotGiven = NOT_GIVEN,
         sampling_params: SamplingParams | NotGiven = NOT_GIVEN,
         stream: Literal[False] | Literal[True] | NotGiven = NOT_GIVEN,
-        tool_choice: Literal["auto", "required"] | NotGiven = NOT_GIVEN,
+        tool_choice: Literal["auto", "required", "none"] | NotGiven = NOT_GIVEN,
         tool_config: inference_chat_completion_params.ToolConfig | NotGiven = NOT_GIVEN,
         tool_prompt_format: Literal["json", "function_tag", "python_list"] | NotGiven = NOT_GIVEN,
         tools: Iterable[inference_chat_completion_params.Tool] | NotGiven = NOT_GIVEN,
diff --git a/src/llama_stack_client/resources/telemetry.py b/src/llama_stack_client/resources/telemetry.py
@@ -118,20 +118,20 @@ def get_span_tree(
         """
         if not span_id:
             raise ValueError(f"Expected a non-empty value for `span_id` but received {span_id!r}")
-        return self._get(
+        return self._post(
             f"/v1/telemetry/spans/{span_id}/tree",
+            body=maybe_transform(
+                {
+                    "attributes_to_return": attributes_to_return,
+                    "max_depth": max_depth,
+                },
+                telemetry_get_span_tree_params.TelemetryGetSpanTreeParams,
+            ),
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
                 extra_body=extra_body,
                 timeout=timeout,
-                query=maybe_transform(
-                    {
-                        "attributes_to_return": attributes_to_return,
-                        "max_depth": max_depth,
-                    },
-                    telemetry_get_span_tree_params.TelemetryGetSpanTreeParams,
-                ),
                 post_parser=DataWrapper[TelemetryGetSpanTreeResponse]._unwrapper,
             ),
             cast_to=cast(Type[TelemetryGetSpanTreeResponse], DataWrapper[TelemetryGetSpanTreeResponse]),
@@ -229,21 +229,21 @@ def query_spans(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        return self._get(
+        return self._post(
             "/v1/telemetry/spans",
+            body=maybe_transform(
+                {
+                    "attribute_filters": attribute_filters,
+                    "attributes_to_return": attributes_to_return,
+                    "max_depth": max_depth,
+                },
+                telemetry_query_spans_params.TelemetryQuerySpansParams,
+            ),
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
                 extra_body=extra_body,
                 timeout=timeout,
-                query=maybe_transform(
-                    {
-                        "attribute_filters": attribute_filters,
-                        "attributes_to_return": attributes_to_return,
-                        "max_depth": max_depth,
-                    },
-                    telemetry_query_spans_params.TelemetryQuerySpansParams,
-                ),
                 post_parser=DataWrapper[TelemetryQuerySpansResponse]._unwrapper,
             ),
             cast_to=cast(Type[TelemetryQuerySpansResponse], DataWrapper[TelemetryQuerySpansResponse]),
@@ -273,22 +273,22 @@ def query_traces(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        return self._get(
+        return self._post(
             "/v1/telemetry/traces",
+            body=maybe_transform(
+                {
+                    "attribute_filters": attribute_filters,
+                    "limit": limit,
+                    "offset": offset,
+                    "order_by": order_by,
+                },
+                telemetry_query_traces_params.TelemetryQueryTracesParams,
+            ),
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
                 extra_body=extra_body,
                 timeout=timeout,
-                query=maybe_transform(
-                    {
-                        "attribute_filters": attribute_filters,
-                        "limit": limit,
-                        "offset": offset,
-                        "order_by": order_by,
-                    },
-                    telemetry_query_traces_params.TelemetryQueryTracesParams,
-                ),
                 post_parser=DataWrapper[TelemetryQueryTracesResponse]._unwrapper,
             ),
             cast_to=cast(Type[TelemetryQueryTracesResponse], DataWrapper[TelemetryQueryTracesResponse]),
@@ -416,20 +416,20 @@ async def get_span_tree(
         """
         if not span_id:
             raise ValueError(f"Expected a non-empty value for `span_id` but received {span_id!r}")
-        return await self._get(
+        return await self._post(
             f"/v1/telemetry/spans/{span_id}/tree",
+            body=await async_maybe_transform(
+                {
+                    "attributes_to_return": attributes_to_return,
+                    "max_depth": max_depth,
+                },
+                telemetry_get_span_tree_params.TelemetryGetSpanTreeParams,
+            ),
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
                 extra_body=extra_body,
                 timeout=timeout,
-                query=await async_maybe_transform(
-                    {
-                        "attributes_to_return": attributes_to_return,
-                        "max_depth": max_depth,
-                    },
-                    telemetry_get_span_tree_params.TelemetryGetSpanTreeParams,
-                ),
                 post_parser=DataWrapper[TelemetryGetSpanTreeResponse]._unwrapper,
             ),
             cast_to=cast(Type[TelemetryGetSpanTreeResponse], DataWrapper[TelemetryGetSpanTreeResponse]),
@@ -527,21 +527,21 @@ async def query_spans(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        return await self._get(
+        return await self._post(
             "/v1/telemetry/spans",
+            body=await async_maybe_transform(
+                {
+                    "attribute_filters": attribute_filters,
+                    "attributes_to_return": attributes_to_return,
+                    "max_depth": max_depth,
+                },
+                telemetry_query_spans_params.TelemetryQuerySpansParams,
+            ),
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
                 extra_body=extra_body,
                 timeout=timeout,
-                query=await async_maybe_transform(
-                    {
-                        "attribute_filters": attribute_filters,
-                        "attributes_to_return": attributes_to_return,
-                        "max_depth": max_depth,
-                    },
-                    telemetry_query_spans_params.TelemetryQuerySpansParams,
-                ),
                 post_parser=DataWrapper[TelemetryQuerySpansResponse]._unwrapper,
             ),
             cast_to=cast(Type[TelemetryQuerySpansResponse], DataWrapper[TelemetryQuerySpansResponse]),
@@ -571,22 +571,22 @@ async def query_traces(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        return await self._get(
+        return await self._post(
             "/v1/telemetry/traces",
+            body=await async_maybe_transform(
+                {
+                    "attribute_filters": attribute_filters,
+                    "limit": limit,
+                    "offset": offset,
+                    "order_by": order_by,
+                },
+                telemetry_query_traces_params.TelemetryQueryTracesParams,
+            ),
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
                 extra_body=extra_body,
                 timeout=timeout,
-                query=await async_maybe_transform(
-                    {
-                        "attribute_filters": attribute_filters,
-                        "limit": limit,
-                        "offset": offset,
-                        "order_by": order_by,
-                    },
-                    telemetry_query_traces_params.TelemetryQueryTracesParams,
-                ),
                 post_parser=DataWrapper[TelemetryQueryTracesResponse]._unwrapper,
             ),
             cast_to=cast(Type[TelemetryQueryTracesResponse], DataWrapper[TelemetryQueryTracesResponse]),
diff --git a/src/llama_stack_client/types/agents/turn_create_params.py b/src/llama_stack_client/types/agents/turn_create_params.py
@@ -21,7 +21,7 @@
     "DocumentContentURL",
     "ToolConfig",
     "Toolgroup",
-    "ToolgroupUnionMember1",
+    "ToolgroupAgentToolGroupWithArgs",
     "TurnCreateParamsNonStreaming",
     "TurnCreateParamsStreaming",
 ]
@@ -95,7 +95,7 @@ class Document(TypedDict, total=False):
 
 
 class ToolConfig(TypedDict, total=False):
-    system_message_behavior: Required[Literal["append", "replace"]]
+    system_message_behavior: Literal["append", "replace"]
     """(Optional) Config for how to override the default system prompt.
 
     - `SystemMessageBehavior.append`: Appends the provided system message to the
@@ -105,10 +105,11 @@ class ToolConfig(TypedDict, total=False):
       definitions should be inserted.
     """
 
-    tool_choice: Literal["auto", "required"]
-    """(Optional) Whether tool use is required or automatic.
+    tool_choice: Union[Literal["auto", "required", "none"], str]
+    """(Optional) Whether tool use is automatic, required, or none.
 
-    Defaults to ToolChoice.auto.
+    Can also specify a tool name to use a specific tool. Defaults to
+    ToolChoice.auto.
     """
 
     tool_prompt_format: Literal["json", "function_tag", "python_list"]
@@ -122,13 +123,13 @@ class ToolConfig(TypedDict, total=False):
     """
 
 
-class ToolgroupUnionMember1(TypedDict, total=False):
+class ToolgroupAgentToolGroupWithArgs(TypedDict, total=False):
     args: Required[Dict[str, Union[bool, float, str, Iterable[object], object, None]]]
 
     name: Required[str]
 
 
-Toolgroup: TypeAlias = Union[str, ToolgroupUnionMember1]
+Toolgroup: TypeAlias = Union[str, ToolgroupAgentToolGroupWithArgs]
 
 
 class TurnCreateParamsNonStreaming(TurnCreateParamsBase, total=False):
diff --git a/src/llama_stack_client/types/batch_inference_chat_completion_params.py b/src/llama_stack_client/types/batch_inference_chat_completion_params.py
@@ -25,7 +25,7 @@ class BatchInferenceChatCompletionParams(TypedDict, total=False):
 
     sampling_params: SamplingParams
 
-    tool_choice: Literal["auto", "required"]
+    tool_choice: Literal["auto", "required", "none"]
     """Whether tool use is required or automatic.
 
     This is a hint to the model which may not be followed. It depends on the
diff --git a/src/llama_stack_client/types/inference_chat_completion_params.py b/src/llama_stack_client/types/inference_chat_completion_params.py
@@ -49,7 +49,7 @@ class InferenceChatCompletionParamsBase(TypedDict, total=False):
     sampling_params: SamplingParams
     """Parameters to control the sampling strategy"""
 
-    tool_choice: Literal["auto", "required"]
+    tool_choice: Literal["auto", "required", "none"]
     """(Optional) Whether tool use is required or automatic.
 
     Defaults to ToolChoice.auto. .. deprecated:: Use tool_config instead.
@@ -79,7 +79,7 @@ class Logprobs(TypedDict, total=False):
 
 
 class ToolConfig(TypedDict, total=False):
-    system_message_behavior: Required[Literal["append", "replace"]]
+    system_message_behavior: Literal["append", "replace"]
     """(Optional) Config for how to override the default system prompt.
 
     - `SystemMessageBehavior.append`: Appends the provided system message to the
@@ -89,10 +89,11 @@ class ToolConfig(TypedDict, total=False):
       definitions should be inserted.
     """
 
-    tool_choice: Literal["auto", "required"]
-    """(Optional) Whether tool use is required or automatic.
+    tool_choice: Union[Literal["auto", "required", "none"], str]
+    """(Optional) Whether tool use is automatic, required, or none.
 
-    Defaults to ToolChoice.auto.
+    Can also specify a tool name to use a specific tool. Defaults to
+    ToolChoice.auto.
     """
 
     tool_prompt_format: Literal["json", "function_tag", "python_list"]
diff --git a/src/llama_stack_client/types/shared/agent_config.py b/src/llama_stack_client/types/shared/agent_config.py
diff --git a/src/llama_stack_client/types/shared_params/agent_config.py b/src/llama_stack_client/types/shared_params/agent_config.py
diff --git a/tests/decoders/test_jsonl.py b/tests/decoders/test_jsonl.py